diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-6d28cc131b0cad61c681aed6b9f6611a12b.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-6d28cc131b0cad61c681aed6b9f6611a12b.patch new file mode 100644 index 00000000..74f2018c --- /dev/null +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-6d28cc131b0cad61c681aed6b9f6611a12b.patch @@ -0,0 +1,1260 @@ +From b95b0da9efaa79a639d9425769ab01aae98b475d Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Tue, 3 Sep 2024 07:18:49 +1000 +Subject: [PATCH] Updated vkd3d to 6d28cc131b0cad61c681aed6b9f6611a12b352d1. + +--- + libs/vkd3d/include/private/vkd3d_common.h | 2 +- + libs/vkd3d/libs/vkd3d-common/blob.c | 1 + + libs/vkd3d/libs/vkd3d-shader/checksum.c | 49 ++++--- + libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 40 +++--- + libs/vkd3d/libs/vkd3d-shader/dxbc.c | 19 ++- + libs/vkd3d/libs/vkd3d-shader/fx.c | 95 ++++++------ + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 6 +- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 10 +- + libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + + libs/vkd3d/libs/vkd3d-shader/spirv.c | 66 ++++----- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 136 +++++++++--------- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 107 ++++++++++---- + .../libs/vkd3d-shader/vkd3d_shader_private.h | 9 +- + 13 files changed, 316 insertions(+), 225 deletions(-) + +diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h +index 39145a97df1..c62dc00415f 100644 +--- a/libs/vkd3d/include/private/vkd3d_common.h ++++ b/libs/vkd3d/include/private/vkd3d_common.h +@@ -273,7 +273,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) + { + #ifdef _MSC_VER + return __popcnt(v); +-#elif defined(__MINGW32__) ++#elif defined(HAVE_BUILTIN_POPCOUNT) + return __builtin_popcount(v); + #else + v -= (v >> 1) & 0x55555555; +diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c +index f60ef7db769..c2c6ad67804 100644 +--- a/libs/vkd3d/libs/vkd3d-common/blob.c ++++ b/libs/vkd3d/libs/vkd3d-common/blob.c +@@ -20,6 +20,7 @@ + #define WIDL_C_INLINE_WRAPPERS + #endif + #define COBJMACROS ++ + #define CONST_VTABLE + #include "vkd3d.h" + #include "vkd3d_blob.h" +diff --git a/libs/vkd3d/libs/vkd3d-shader/checksum.c b/libs/vkd3d/libs/vkd3d-shader/checksum.c +index d9560628c77..45de1c92513 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/checksum.c ++++ b/libs/vkd3d/libs/vkd3d-shader/checksum.c +@@ -33,6 +33,11 @@ + * will fill a supplied 16-byte array with the digest. + */ + ++/* ++ * DXBC uses a variation of the MD5 algorithm, which only changes the way ++ * the message is padded in the final step. ++ */ ++ + #include "vkd3d_shader_private.h" + + #define DXBC_CHECKSUM_BLOCK_SIZE 64 +@@ -230,10 +235,9 @@ static void md5_update(struct md5_ctx *ctx, const unsigned char *buf, unsigned i + memcpy(ctx->in, buf, len); + } + +-static void dxbc_checksum_final(struct md5_ctx *ctx) ++static void md5_final(struct md5_ctx *ctx, enum vkd3d_md5_variant variant) + { + unsigned int padding; +- unsigned int length; + unsigned int count; + unsigned char *p; + +@@ -260,7 +264,7 @@ static void dxbc_checksum_final(struct md5_ctx *ctx) + /* Now fill the next block */ + memset(ctx->in, 0, DXBC_CHECKSUM_BLOCK_SIZE); + } +- else ++ else if (variant == VKD3D_MD5_DXBC) + { + /* Make place for bitcount at the beginning of the block */ + memmove(&ctx->in[4], ctx->in, count); +@@ -268,33 +272,44 @@ static void dxbc_checksum_final(struct md5_ctx *ctx) + /* Pad block to 60 bytes */ + memset(p + 4, 0, padding - 4); + } ++ else ++ { ++ /* Pad block to 56 bytes */ ++ memset(p, 0, padding - 8); ++ } + + /* Append length in bits and transform */ +- length = ctx->i[0]; +- memcpy(&ctx->in[0], &length, sizeof(length)); +- byte_reverse(&ctx->in[4], 14); +- length = ctx->i[0] >> 2 | 0x1; +- memcpy(&ctx->in[DXBC_CHECKSUM_BLOCK_SIZE - 4], &length, sizeof(length)); ++ if (variant == VKD3D_MD5_DXBC) ++ { ++ unsigned int length; ++ ++ length = ctx->i[0]; ++ memcpy(&ctx->in[0], &length, sizeof(length)); ++ byte_reverse(&ctx->in[4], 14); ++ length = ctx->i[0] >> 2 | 0x1; ++ memcpy(&ctx->in[DXBC_CHECKSUM_BLOCK_SIZE - 4], &length, sizeof(length)); ++ } ++ else ++ { ++ byte_reverse(ctx->in, 14); ++ ++ ((unsigned int *)ctx->in)[14] = ctx->i[0]; ++ ((unsigned int *)ctx->in)[15] = ctx->i[1]; ++ } + + md5_transform(ctx->buf, (unsigned int *)ctx->in); + byte_reverse((unsigned char *)ctx->buf, 4); + memcpy(ctx->digest, ctx->buf, 16); + } + +-#define DXBC_CHECKSUM_SKIP_BYTE_COUNT 20 +- +-void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]) ++void vkd3d_compute_md5(const void *data, size_t size, uint32_t checksum[4], enum vkd3d_md5_variant variant) + { +- const uint8_t *ptr = dxbc; ++ const uint8_t *ptr = data; + struct md5_ctx ctx; + +- VKD3D_ASSERT(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); +- ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; +- size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; +- + md5_init(&ctx); + md5_update(&ctx, ptr, size); +- dxbc_checksum_final(&ctx); ++ md5_final(&ctx, variant); + + memcpy(checksum, ctx.digest, sizeof(ctx.digest)); + } +diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +index d05394c3ab7..1145a91f3e6 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +@@ -1423,32 +1423,32 @@ bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, + } + + bool hlsl_sm1_usage_from_semantic(const char *semantic_name, +- uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) ++ uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx) + { + static const struct + { + const char *name; +- D3DDECLUSAGE usage; ++ enum vkd3d_decl_usage usage; + } + semantics[] = + { +- {"binormal", D3DDECLUSAGE_BINORMAL}, +- {"blendindices", D3DDECLUSAGE_BLENDINDICES}, +- {"blendweight", D3DDECLUSAGE_BLENDWEIGHT}, +- {"color", D3DDECLUSAGE_COLOR}, +- {"depth", D3DDECLUSAGE_DEPTH}, +- {"fog", D3DDECLUSAGE_FOG}, +- {"normal", D3DDECLUSAGE_NORMAL}, +- {"position", D3DDECLUSAGE_POSITION}, +- {"positiont", D3DDECLUSAGE_POSITIONT}, +- {"psize", D3DDECLUSAGE_PSIZE}, +- {"sample", D3DDECLUSAGE_SAMPLE}, +- {"sv_depth", D3DDECLUSAGE_DEPTH}, +- {"sv_position", D3DDECLUSAGE_POSITION}, +- {"sv_target", D3DDECLUSAGE_COLOR}, +- {"tangent", D3DDECLUSAGE_TANGENT}, +- {"tessfactor", D3DDECLUSAGE_TESSFACTOR}, +- {"texcoord", D3DDECLUSAGE_TEXCOORD}, ++ {"binormal", VKD3D_DECL_USAGE_BINORMAL}, ++ {"blendindices", VKD3D_DECL_USAGE_BLEND_INDICES}, ++ {"blendweight", VKD3D_DECL_USAGE_BLEND_WEIGHT}, ++ {"color", VKD3D_DECL_USAGE_COLOR}, ++ {"depth", VKD3D_DECL_USAGE_DEPTH}, ++ {"fog", VKD3D_DECL_USAGE_FOG}, ++ {"normal", VKD3D_DECL_USAGE_NORMAL}, ++ {"position", VKD3D_DECL_USAGE_POSITION}, ++ {"positiont", VKD3D_DECL_USAGE_POSITIONT}, ++ {"psize", VKD3D_DECL_USAGE_PSIZE}, ++ {"sample", VKD3D_DECL_USAGE_SAMPLE}, ++ {"sv_depth", VKD3D_DECL_USAGE_DEPTH}, ++ {"sv_position", VKD3D_DECL_USAGE_POSITION}, ++ {"sv_target", VKD3D_DECL_USAGE_COLOR}, ++ {"tangent", VKD3D_DECL_USAGE_TANGENT}, ++ {"tessfactor", VKD3D_DECL_USAGE_TESS_FACTOR}, ++ {"texcoord", VKD3D_DECL_USAGE_TEXCOORD}, + }; + + unsigned int i; +@@ -2203,8 +2203,8 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + struct sm1_dst_register reg = {0}; ++ enum vkd3d_decl_usage usage; + uint32_t token, usage_idx; +- D3DDECLUSAGE usage; + bool ret; + + if (hlsl_sm1_register_from_semantic(version, element->semantic_name, +diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +index 184788dc57e..93fc993e0d1 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c ++++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c +@@ -20,6 +20,19 @@ + + #include "vkd3d_shader_private.h" + ++#define DXBC_CHECKSUM_SKIP_BYTE_COUNT 20 ++ ++static void compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]) ++{ ++ const uint8_t *ptr = dxbc; ++ ++ VKD3D_ASSERT(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); ++ ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; ++ size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; ++ ++ vkd3d_compute_md5(ptr, size, checksum, VKD3D_MD5_DXBC); ++} ++ + void dxbc_writer_init(struct dxbc_writer *dxbc) + { + memset(dxbc, 0, sizeof(*dxbc)); +@@ -72,7 +85,7 @@ int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_ + } + set_u32(&buffer, size_position, bytecode_get_size(&buffer)); + +- vkd3d_compute_dxbc_checksum(buffer.data, buffer.size, checksum); ++ compute_dxbc_checksum(buffer.data, buffer.size, checksum); + for (i = 0; i < 4; ++i) + set_u32(&buffer, checksum_position + i * sizeof(uint32_t), checksum[i]); + +@@ -188,7 +201,7 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ + checksum[3] = read_u32(&ptr); + if (!(flags & VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM)) + { +- vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); ++ compute_dxbc_checksum(data, data_size, calculated_checksum); + if (memcmp(checksum, calculated_checksum, sizeof(checksum))) + { + WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " +@@ -1488,7 +1501,7 @@ int vkd3d_shader_serialize_root_signature(const struct vkd3d_shader_versioned_ro + dxbc->code = context.buffer.data; + dxbc->size = total_size; + +- vkd3d_compute_dxbc_checksum(dxbc->code, dxbc->size, checksum); ++ compute_dxbc_checksum(dxbc->code, dxbc->size, checksum); + for (i = 0; i < 4; ++i) + set_u32(&context.buffer, (i + 1) * sizeof(uint32_t), checksum[i]); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index a1d1fd6572f..e3ab71fb386 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -575,6 +575,12 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) + } + } + ++static bool is_numeric_fx_4_type(const struct hlsl_type *type) ++{ ++ type = hlsl_get_multiarray_element_type(type); ++ return type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type); ++} ++ + static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) + { + struct field_offsets +@@ -584,43 +590,41 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + uint32_t offset; + uint32_t type; + }; +- uint32_t name_offset, offset, total_size, packed_size, stride, numeric_desc; ++ uint32_t name_offset, offset, unpacked_size, packed_size, stride, numeric_desc; + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + struct field_offsets *field_offsets = NULL; ++ const struct hlsl_type *element_type; + struct hlsl_ctx *ctx = fx->ctx; + uint32_t elements_count = 0; + const char *name; + size_t i; + +- /* Resolve arrays to element type and number of elements. */ + if (type->class == HLSL_CLASS_ARRAY) +- { + elements_count = hlsl_get_multiarray_size(type); +- type = hlsl_get_multiarray_element_type(type); +- } ++ element_type = hlsl_get_multiarray_element_type(type); + +- name = get_fx_4_type_name(type); ++ name = get_fx_4_type_name(element_type); + + name_offset = write_string(name, fx); +- if (type->class == HLSL_CLASS_STRUCT) ++ if (element_type->class == HLSL_CLASS_STRUCT) + { +- if (!(field_offsets = hlsl_calloc(ctx, type->e.record.field_count, sizeof(*field_offsets)))) ++ if (!(field_offsets = hlsl_calloc(ctx, element_type->e.record.field_count, sizeof(*field_offsets)))) + return 0; + +- for (i = 0; i < type->e.record.field_count; ++i) ++ for (i = 0; i < element_type->e.record.field_count; ++i) + { +- const struct hlsl_struct_field *field = &type->e.record.fields[i]; ++ const struct hlsl_struct_field *field = &element_type->e.record.fields[i]; + + field_offsets[i].name = write_string(field->name, fx); + field_offsets[i].semantic = write_string(field->semantic.raw_name, fx); +- field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC]; ++ field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float); + field_offsets[i].type = write_type(field->type, fx); + } + } + + offset = put_u32_unaligned(buffer, name_offset); + +- switch (type->class) ++ switch (element_type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: +@@ -659,32 +663,32 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + vkd3d_unreachable(); + + case HLSL_CLASS_VOID: +- FIXME("Writing type class %u is not implemented.\n", type->class); ++ FIXME("Writing type class %u is not implemented.\n", element_type->class); + set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + return 0; + } + + /* Structures can only contain numeric fields, this is validated during variable declaration. */ +- total_size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); ++ unpacked_size = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); ++ + packed_size = 0; +- if (type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type)) +- packed_size = hlsl_type_component_count(type) * sizeof(float); ++ if (is_numeric_fx_4_type(element_type)) ++ packed_size = hlsl_type_component_count(element_type) * sizeof(float); + if (elements_count) +- { +- total_size *= elements_count; + packed_size *= elements_count; +- } ++ ++ stride = element_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); + stride = align(stride, 4 * sizeof(float)); + + put_u32_unaligned(buffer, elements_count); +- put_u32_unaligned(buffer, total_size); ++ put_u32_unaligned(buffer, unpacked_size); + put_u32_unaligned(buffer, stride); + put_u32_unaligned(buffer, packed_size); + +- if (type->class == HLSL_CLASS_STRUCT) ++ if (element_type->class == HLSL_CLASS_STRUCT) + { +- put_u32_unaligned(buffer, type->e.record.field_count); +- for (i = 0; i < type->e.record.field_count; ++i) ++ put_u32_unaligned(buffer, element_type->e.record.field_count); ++ for (i = 0; i < element_type->e.record.field_count; ++i) + { + const struct field_offsets *field = &field_offsets[i]; + +@@ -700,7 +704,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + put_u32_unaligned(buffer, 0); /* Interface count */ + } + } +- else if (type->class == HLSL_CLASS_TEXTURE) ++ else if (element_type->class == HLSL_CLASS_TEXTURE) + { + static const uint32_t texture_type[] = + { +@@ -716,13 +720,13 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + [HLSL_SAMPLER_DIM_CUBEARRAY] = 23, + }; + +- put_u32_unaligned(buffer, texture_type[type->sampler_dim]); ++ put_u32_unaligned(buffer, texture_type[element_type->sampler_dim]); + } +- else if (type->class == HLSL_CLASS_SAMPLER) ++ else if (element_type->class == HLSL_CLASS_SAMPLER) + { + put_u32_unaligned(buffer, 21); + } +- else if (type->class == HLSL_CLASS_UAV) ++ else if (element_type->class == HLSL_CLASS_UAV) + { + static const uint32_t uav_type[] = + { +@@ -735,60 +739,60 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co + [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = 40, + }; + +- put_u32_unaligned(buffer, uav_type[type->sampler_dim]); ++ put_u32_unaligned(buffer, uav_type[element_type->sampler_dim]); + } +- else if (type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) ++ else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW) + { + put_u32_unaligned(buffer, 20); + } +- else if (type->class == HLSL_CLASS_RENDER_TARGET_VIEW) ++ else if (element_type->class == HLSL_CLASS_RENDER_TARGET_VIEW) + { + put_u32_unaligned(buffer, 19); + } +- else if (type->class == HLSL_CLASS_PIXEL_SHADER) ++ else if (element_type->class == HLSL_CLASS_PIXEL_SHADER) + { + put_u32_unaligned(buffer, 5); + } +- else if (type->class == HLSL_CLASS_VERTEX_SHADER) ++ else if (element_type->class == HLSL_CLASS_VERTEX_SHADER) + { + put_u32_unaligned(buffer, 6); + } +- else if (type->class == HLSL_CLASS_RASTERIZER_STATE) ++ else if (element_type->class == HLSL_CLASS_RASTERIZER_STATE) + { + put_u32_unaligned(buffer, 4); + } +- else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) ++ else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) + { + put_u32_unaligned(buffer, 3); + } +- else if (type->class == HLSL_CLASS_BLEND_STATE) ++ else if (element_type->class == HLSL_CLASS_BLEND_STATE) + { + put_u32_unaligned(buffer, 2); + } +- else if (type->class == HLSL_CLASS_STRING) ++ else if (element_type->class == HLSL_CLASS_STRING) + { + put_u32_unaligned(buffer, 1); + } +- else if (hlsl_is_numeric_type(type)) ++ else if (hlsl_is_numeric_type(element_type)) + { +- numeric_desc = get_fx_4_numeric_type_description(type, fx); ++ numeric_desc = get_fx_4_numeric_type_description(element_type, fx); + put_u32_unaligned(buffer, numeric_desc); + } +- else if (type->class == HLSL_CLASS_COMPUTE_SHADER) ++ else if (element_type->class == HLSL_CLASS_COMPUTE_SHADER) + { + put_u32_unaligned(buffer, 28); + } +- else if (type->class == HLSL_CLASS_HULL_SHADER) ++ else if (element_type->class == HLSL_CLASS_HULL_SHADER) + { + put_u32_unaligned(buffer, 29); + } +- else if (type->class == HLSL_CLASS_DOMAIN_SHADER) ++ else if (element_type->class == HLSL_CLASS_DOMAIN_SHADER) + { + put_u32_unaligned(buffer, 30); + } + else + { +- FIXME("Type %u is not supported.\n", type->class); ++ FIXME("Type %u is not supported.\n", element_type->class); + set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + } + +@@ -2126,7 +2130,7 @@ static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, + } + + /* For some states assignment sets all of the elements. This behaviour is limited to certain states of BlendState +- object, and only when fx_5_0 profile is used. */ ++ object, and only when fx_4_1 or fx_5_0 profile is used. */ + static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *var, struct hlsl_state_block *block, + unsigned int entry_index, struct fx_write_context *fx) + { +@@ -2140,7 +2144,7 @@ static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var * + + if (type->class != HLSL_CLASS_BLEND_STATE) + return 1; +- if (ctx->profile->major_version != 5) ++ if (hlsl_version_lt(ctx, 4, 1)) + return 1; + if (entry->lhs_has_index) + return 1; +@@ -2401,6 +2405,9 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx + size = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { ++ if (!is_numeric_fx_4_type(var->data_type)) ++ continue; ++ + if (var->buffer != b) + continue; + +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +index 22e25b23988..bf38c0cd945 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h +@@ -1528,7 +1528,7 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); + bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, + unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); + bool hlsl_sm1_usage_from_semantic(const char *semantic_name, +- uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); ++ uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx); + + void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); + int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, +@@ -1536,8 +1536,8 @@ int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, + struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); + +-bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, +- const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); ++bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, ++ struct hlsl_ctx *ctx, const struct hlsl_semantic *hlsl_semantic, bool output); + bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, + bool output, enum vkd3d_shader_register_type *type, bool *has_idx); + int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index 92b5c71c43f..154328a64c3 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -5128,7 +5128,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + if (ctx->profile->major_version < 4) + { + struct vkd3d_shader_version version; +- D3DDECLUSAGE usage; ++ enum vkd3d_decl_usage usage; + uint32_t usage_idx; + + /* ps_1_* outputs are special and go in temp register 0. */ +@@ -5152,10 +5152,10 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var + } + else + { +- D3D_NAME usage; ++ enum vkd3d_shader_sysval_semantic semantic; + bool has_idx; + +- if (!hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage)) ++ if (!sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Invalid semantic '%s'.", var->semantic.name); +@@ -5956,8 +5956,8 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, + if (!hlsl_sm1_register_from_semantic(&program->shader_version, + var->semantic.name, var->semantic.index, output, &type, ®ister_index)) + { ++ enum vkd3d_decl_usage usage; + unsigned int usage_idx; +- D3DDECLUSAGE usage; + bool ret; + + register_index = var->regs[HLSL_REGSET_NUMERIC].id; +@@ -5969,7 +5969,7 @@ static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, + * SV_Position; the closer equivalent is VPOS, which is not declared + * as a semantic. */ + if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX +- && output && usage == D3DDECLUSAGE_POSITION) ++ && output && usage == VKD3D_DECL_USAGE_POSITION) + sysval = VKD3D_SHADER_SV_POSITION; + } + mask = (1 << var->data_type->dimx) - 1; +diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l +index 2b7455a5c30..7fc963192cf 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/preproc.l ++++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l +@@ -20,6 +20,7 @@ + + %{ + ++#include "preproc.h" + #include "preproc.tab.h" + + #undef ERROR /* defined in wingdi.h */ +diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c +index 49979ab2491..8052e951704 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c ++++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c +@@ -6120,12 +6120,12 @@ static void spirv_compiler_decorate_descriptor(struct spirv_compiler *compiler, + static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler *compiler, + SpvStorageClass storage_class, uint32_t type_id, const struct vkd3d_shader_register *reg, + const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, +- bool is_uav, bool is_uav_counter, struct vkd3d_descriptor_variable_info *var_info) ++ const struct vkd3d_shader_descriptor_info1 *descriptor, bool is_uav_counter, ++ struct vkd3d_descriptor_variable_info *var_info) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + struct vkd3d_descriptor_binding_address binding_address; + struct vkd3d_shader_descriptor_binding binding; +- const struct vkd3d_shader_descriptor_info1 *d; + uint32_t array_type_id, ptr_type_id, var_id; + bool write_only = false, coherent = false; + struct vkd3d_symbol symbol; +@@ -6135,12 +6135,11 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * + resource_type, is_uav_counter, &binding_address); + var_info->binding_base_idx = binding_address.binding_base_idx; + +- if (is_uav) ++ if (descriptor->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV && !is_uav_counter) + { +- d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); +- write_only = !(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); ++ write_only = !(descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); + /* ROVs are implicitly globally coherent. */ +- coherent = d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW); ++ coherent = descriptor->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW); + } + + if (binding.count == 1 && range->first == binding_address.binding_base_idx && range->last != ~0u +@@ -6194,11 +6193,12 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * + } + + static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, +- const struct vkd3d_shader_register_range *range, unsigned int register_id, unsigned int size_in_bytes) ++ const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) + { + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + uint32_t vec4_id, array_type_id, length_id, struct_id, var_id; + const SpvStorageClass storage_class = SpvStorageClassUniform; ++ unsigned int size_in_bytes = descriptor->buffer_size; + struct vkd3d_push_constant_buffer_binding *push_cb; + struct vkd3d_descriptor_variable_info var_info; + struct vkd3d_shader_register reg; +@@ -6206,7 +6206,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, + unsigned int size; + + vsir_register_init(®, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 3); +- reg.idx[0].offset = register_id; ++ reg.idx[0].offset = descriptor->register_id; + reg.idx[1].offset = range->first; + reg.idx[2].offset = range->last; + +@@ -6239,7 +6239,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, + vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); + + var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, +- ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, false, &var_info); ++ ®, range, VKD3D_SHADER_RESOURCE_BUFFER, descriptor, false, &var_info); + + vkd3d_symbol_make_register(®_symbol, ®); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, +@@ -6275,7 +6275,7 @@ static void spirv_compiler_emit_dcl_immediate_constant_buffer(struct spirv_compi + } + + static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compiler, +- const struct vkd3d_shader_register_range *range, unsigned int register_id) ++ const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) + { + const SpvStorageClass storage_class = SpvStorageClassUniformConstant; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; +@@ -6285,7 +6285,7 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi + uint32_t type_id, var_id; + + vsir_register_init(®, VKD3DSPR_SAMPLER, VKD3D_DATA_FLOAT, 1); +- reg.idx[0].offset = register_id; ++ reg.idx[0].offset = descriptor->register_id; + + vkd3d_symbol_make_sampler(®_symbol, ®); + reg_symbol.info.sampler.range = *range; +@@ -6295,8 +6295,8 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi + return; + + type_id = vkd3d_spirv_get_op_type_sampler(builder); +- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, +- range, VKD3D_SHADER_RESOURCE_NONE, false, false, &var_info); ++ var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ++ ®, range, VKD3D_SHADER_RESOURCE_NONE, descriptor, false, &var_info); + + vkd3d_symbol_make_register(®_symbol, ®); + vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, +@@ -6461,21 +6461,24 @@ static void spirv_compiler_emit_combined_sampler_declarations(struct spirv_compi + } + + static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *compiler, +- const struct vkd3d_shader_register_range *range, unsigned int register_id, +- unsigned int sample_count, bool is_uav, enum vkd3d_shader_resource_type resource_type, +- enum vkd3d_shader_resource_data_type resource_data_type, unsigned int structure_stride, bool raw) ++ const struct vkd3d_shader_register_range *range, const struct vkd3d_shader_descriptor_info1 *descriptor) + { ++ bool raw = descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER; ++ enum vkd3d_shader_resource_type resource_type = descriptor->resource_type; + struct vkd3d_descriptor_variable_info var_info, counter_var_info = {0}; ++ bool is_uav = descriptor->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; ++ unsigned int structure_stride = descriptor->structure_stride / 4; + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + SpvStorageClass storage_class = SpvStorageClassUniformConstant; + uint32_t counter_type_id, type_id, var_id, counter_var_id = 0; + const struct vkd3d_spirv_resource_type *resource_type_info; ++ unsigned int sample_count = descriptor->sample_count; + enum vkd3d_shader_component_type sampled_type; + struct vkd3d_symbol resource_symbol; + struct vkd3d_shader_register reg; + + vsir_register_init(®, is_uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VKD3D_DATA_FLOAT, 1); +- reg.idx[0].offset = register_id; ++ reg.idx[0].offset = descriptor->register_id; + + if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS && sample_count == 1) + resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D; +@@ -6489,7 +6492,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + return; + } + +- sampled_type = vkd3d_component_type_from_resource_data_type(resource_data_type); ++ sampled_type = vkd3d_component_type_from_resource_data_type(descriptor->resource_data_type); + + if (!is_uav && spirv_compiler_has_combined_sampler_for_resource(compiler, range)) + { +@@ -6520,16 +6523,12 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + resource_type_info, sampled_type, structure_stride || raw, 0); + } + +- var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, +- range, resource_type, is_uav, false, &var_info); ++ var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, ++ type_id, ®, range, resource_type, descriptor, false, &var_info); + + if (is_uav) + { +- const struct vkd3d_shader_descriptor_info1 *d; +- +- d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); +- +- if (d->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) ++ if (descriptor->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) + { + if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, +@@ -6543,7 +6542,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + compiler->use_invocation_interlock = true; + } + +- if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) ++ if (descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) + { + VKD3D_ASSERT(structure_stride); /* counters are valid only for structured buffers */ + +@@ -6571,7 +6570,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp + } + + counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, +- type_id, ®, range, resource_type, false, true, &counter_var_info); ++ type_id, ®, range, resource_type, descriptor, true, &counter_var_info); + } + } + +@@ -10564,23 +10563,16 @@ static void spirv_compiler_emit_descriptor_declarations(struct spirv_compiler *c + switch (descriptor->type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: +- spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor->register_id); ++ spirv_compiler_emit_sampler_declaration(compiler, &range, descriptor); + break; + + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: +- spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor->register_id, descriptor->buffer_size); ++ spirv_compiler_emit_cbv_declaration(compiler, &range, descriptor); + break; + + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: +- spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, +- descriptor->sample_count, false, descriptor->resource_type, descriptor->resource_data_type, +- descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); +- break; +- + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: +- spirv_compiler_emit_resource_declaration(compiler, &range, descriptor->register_id, +- descriptor->sample_count, true, descriptor->resource_type, descriptor->resource_data_type, +- descriptor->structure_stride / 4, descriptor->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_RAW_BUFFER); ++ spirv_compiler_emit_resource_declaration(compiler, &range, descriptor); + break; + + default: +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index 84f641cc316..497a4c3b335 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -2782,8 +2782,8 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem + return false; + } + +-bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, +- bool output, D3D_NAME *usage) ++bool sysval_semantic_from_hlsl(enum vkd3d_shader_sysval_semantic *semantic, ++ struct hlsl_ctx *ctx, const struct hlsl_semantic *hlsl_semantic, bool output) + { + unsigned int i; + +@@ -2792,7 +2792,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant + const char *name; + bool output; + enum vkd3d_shader_type shader_type; +- D3D_NAME usage; ++ enum vkd3d_shader_sysval_semantic semantic; + } + semantics[] = + { +@@ -2800,46 +2800,46 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant + {"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + {"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u}, + +- {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, +- {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, +- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, +- +- {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, +- {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION}, +- {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID}, +- +- {"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, +- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION}, +- {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_PRIMITIVE_ID}, +- {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_IS_FRONT_FACE}, +- {"sv_rendertargetarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_RENDER_TARGET_ARRAY_INDEX}, +- {"sv_viewportarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_VIEWPORT_ARRAY_INDEX}, +- +- {"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, +- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, +- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, +- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, +- {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_COVERAGE}, +- +- {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, +- {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, +- {"sv_instanceid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_INSTANCE_ID}, +- +- {"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, +- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION}, +- {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_RENDER_TARGET_ARRAY_INDEX}, +- {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VIEWPORT_ARRAY_INDEX}, ++ {"position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, ++ {"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, ++ ++ {"position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_POSITION}, ++ {"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SHADER_SV_PRIMITIVE_ID}, ++ ++ {"position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, ++ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_POSITION}, ++ {"sv_primitiveid", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_PRIMITIVE_ID}, ++ {"sv_isfrontface", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_IS_FRONT_FACE}, ++ {"sv_rendertargetarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, ++ {"sv_viewportarrayindex", false, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, ++ ++ {"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_TARGET}, ++ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_DEPTH}, ++ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_TARGET}, ++ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_DEPTH}, ++ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SHADER_SV_COVERAGE}, ++ ++ {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_NONE}, ++ {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VERTEX_ID}, ++ {"sv_instanceid", false, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_INSTANCE_ID}, ++ ++ {"position", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_POSITION}, ++ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_POSITION}, ++ {"sv_rendertargetarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX}, ++ {"sv_viewportarrayindex", true, VKD3D_SHADER_TYPE_VERTEX, VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX}, + }; +- bool needs_compat_mapping = ascii_strncasecmp(semantic->name, "sv_", 3); ++ bool needs_compat_mapping = ascii_strncasecmp(hlsl_semantic->name, "sv_", 3); + + for (i = 0; i < ARRAY_SIZE(semantics); ++i) + { +- if (!ascii_strcasecmp(semantic->name, semantics[i].name) ++ if (!ascii_strcasecmp(hlsl_semantic->name, semantics[i].name) + && output == semantics[i].output + && (ctx->semantic_compat_mapping == needs_compat_mapping || !needs_compat_mapping) + && ctx->profile->type == semantics[i].shader_type) + { +- *usage = semantics[i].usage; ++ *semantic = semantics[i].semantic; + return true; + } + } +@@ -2847,7 +2847,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant + if (!needs_compat_mapping) + return false; + +- *usage = D3D_NAME_UNDEFINED; ++ *semantic = VKD3D_SHADER_SV_NONE; + return true; + } + +@@ -2880,16 +2880,16 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + unsigned int width = (1u << var->data_type->dimx) - 1, use_mask; ++ enum vkd3d_shader_sysval_semantic semantic; + uint32_t usage_idx, reg_idx; +- D3D_NAME usage; + bool has_idx; + + if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) + continue; + +- ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); ++ ret = sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output); + VKD3D_ASSERT(ret); +- if (usage == ~0u) ++ if (semantic == ~0u) + continue; + usage_idx = var->semantic.index; + +@@ -2908,12 +2908,12 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + use_mask = 0xf ^ use_mask; + + /* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */ +- if (usage >= 64) +- usage = 0; ++ if (semantic >= VKD3D_SHADER_SV_TARGET) ++ semantic = VKD3D_SHADER_SV_NONE; + + put_u32(&buffer, 0); /* name */ + put_u32(&buffer, usage_idx); +- put_u32(&buffer, usage); ++ put_u32(&buffer, semantic); + switch (var->data_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: +@@ -2944,25 +2944,25 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, + i = 0; + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { +- const char *semantic = var->semantic.name; ++ enum vkd3d_shader_sysval_semantic semantic; ++ const char *name = var->semantic.name; + size_t string_offset; +- D3D_NAME usage; + + if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic)) + continue; + +- hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); +- if (usage == ~0u) ++ sysval_semantic_from_hlsl(&semantic, ctx, &var->semantic, output); ++ if (semantic == ~0u) + continue; + +- if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color")) ++ if (semantic == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color")) + string_offset = put_string(&buffer, "SV_Target"); +- else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth")) ++ else if (semantic == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth")) + string_offset = put_string(&buffer, "SV_Depth"); +- else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position")) ++ else if (semantic == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position")) + string_offset = put_string(&buffer, "SV_Position"); + else +- string_offset = put_string(&buffer, semantic); ++ string_offset = put_string(&buffer, name); + set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset); + } + +@@ -4412,7 +4412,7 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl + { + const struct hlsl_profile_info *profile = tpf->ctx->profile; + const bool output = var->is_output_semantic; +- D3D_NAME usage; ++ enum vkd3d_shader_sysval_semantic semantic; + bool has_idx; + + struct sm4_instruction instr = +@@ -4445,22 +4445,22 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl + if (instr.dsts[0].reg.type == VKD3DSPR_DEPTHOUT) + instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR; + +- hlsl_sm4_usage_from_semantic(tpf->ctx, &var->semantic, output, &usage); +- if (usage == ~0u) +- usage = D3D_NAME_UNDEFINED; ++ sysval_semantic_from_hlsl(&semantic, tpf->ctx, &var->semantic, output); ++ if (semantic == ~0u) ++ semantic = VKD3D_SHADER_SV_NONE; + + if (var->is_input_semantic) + { +- switch (usage) ++ switch (semantic) + { +- case D3D_NAME_UNDEFINED: ++ case VKD3D_SHADER_SV_NONE: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT; + break; + +- case D3D_NAME_INSTANCE_ID: +- case D3D_NAME_PRIMITIVE_ID: +- case D3D_NAME_VERTEX_ID: ++ case VKD3D_SHADER_SV_INSTANCE_ID: ++ case VKD3D_SHADER_SV_PRIMITIVE_ID: ++ case VKD3D_SHADER_SV_VERTEX_ID: + instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL) + ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV; + break; +@@ -4510,25 +4510,25 @@ static void write_sm4_dcl_semantic(const struct tpf_writer *tpf, const struct hl + } + else + { +- if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL) ++ if (semantic == VKD3D_SHADER_SV_NONE || profile->type == VKD3D_SHADER_TYPE_PIXEL) + instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT; + else + instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV; + } + +- switch (usage) ++ switch (semantic) + { +- case D3D_NAME_COVERAGE: +- case D3D_NAME_DEPTH: +- case D3D_NAME_DEPTH_GREATER_EQUAL: +- case D3D_NAME_DEPTH_LESS_EQUAL: +- case D3D_NAME_TARGET: +- case D3D_NAME_UNDEFINED: ++ case VKD3D_SHADER_SV_COVERAGE: ++ case VKD3D_SHADER_SV_DEPTH: ++ case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: ++ case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: ++ case VKD3D_SHADER_SV_TARGET: ++ case VKD3D_SHADER_SV_NONE: + break; + + default: + instr.idx_count = 1; +- instr.idx[0] = usage; ++ instr.idx[0] = semantic; + break; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index 306c1ca0dd8..60be996ae24 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -23,6 +23,8 @@ + #include + #include + ++/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ ++ + static inline int char_to_int(char c) + { + if ('0' <= c && c <= '9') +@@ -443,20 +445,47 @@ void set_string(struct vkd3d_bytecode_buffer *buffer, size_t offset, const char + bytecode_set_bytes(buffer, offset, string, length); + } + +-static void vkd3d_shader_dump_blob(const char *path, const char *profile, +- const char *suffix, const void *data, size_t size) ++struct shader_dump_data ++{ ++ uint8_t checksum[16]; ++ const char *path; ++ const char *profile; ++ const char *source_suffix; ++ const char *target_suffix; ++}; ++ ++static void vkd3d_shader_dump_shader(const struct shader_dump_data *dump_data, ++ const void *data, size_t size, bool source) + { +- static unsigned int shader_id = 0; ++ static const char hexadecimal_digits[] = "0123456789abcdef"; ++ const uint8_t *checksum = dump_data->checksum; ++ char str_checksum[33]; ++ unsigned int pos = 0; + char filename[1024]; +- unsigned int id; ++ unsigned int i; + FILE *f; + +- id = vkd3d_atomic_increment_u32(&shader_id) - 1; ++ if (!dump_data->path) ++ return; + +- if (profile) +- snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%u-%s.%s", path, id, profile, suffix); ++ for (i = 0; i < ARRAY_SIZE(dump_data->checksum); ++i) ++ { ++ str_checksum[2 * i] = hexadecimal_digits[checksum[i] >> 4]; ++ str_checksum[2 * i + 1] = hexadecimal_digits[checksum[i] & 0xf]; ++ } ++ str_checksum[32] = '\0'; ++ ++ pos = snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%s", dump_data->path, str_checksum); ++ ++ if (dump_data->profile) ++ pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-%s", dump_data->profile); ++ ++ if (source) ++ pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-source.%s", dump_data->source_suffix); + else +- snprintf(filename, ARRAY_SIZE(filename), "%s/vkd3d-shader-%u.%s", path, id, suffix); ++ pos += snprintf(filename + pos, ARRAY_SIZE(filename) - pos, "-target.%s", dump_data->target_suffix); ++ ++ TRACE("Dumping shader to \"%s\".\n", filename); + if ((f = fopen(filename, "wb"))) + { + if (fwrite(data, 1, size, f) != size) +@@ -488,37 +517,59 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t + } + } + +-void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info) ++static const char *shader_get_target_type_suffix(enum vkd3d_shader_target_type type) ++{ ++ switch (type) ++ { ++ case VKD3D_SHADER_TARGET_SPIRV_BINARY: ++ return "spv"; ++ case VKD3D_SHADER_TARGET_SPIRV_TEXT: ++ return "spv.s"; ++ case VKD3D_SHADER_TARGET_D3D_ASM: ++ return "d3d.s"; ++ case VKD3D_SHADER_TARGET_D3D_BYTECODE: ++ return "d3dbc"; ++ case VKD3D_SHADER_TARGET_DXBC_TPF: ++ return "dxbc"; ++ case VKD3D_SHADER_TARGET_GLSL: ++ return "glsl"; ++ case VKD3D_SHADER_TARGET_FX: ++ return "fx"; ++ default: ++ FIXME("Unhandled target type %#x.\n", type); ++ return "bin"; ++ } ++} ++ ++static void fill_shader_dump_data(const struct vkd3d_shader_compile_info *compile_info, ++ struct shader_dump_data *data) + { +- const struct vkd3d_shader_code *shader = &compile_info->source; +- const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; +- const struct hlsl_profile_info *profile; +- const char *profile_name = NULL; + static bool enabled = true; +- const char *path; ++ ++ data->path = NULL; + + if (!enabled) + return; + +- if (!(path = getenv("VKD3D_SHADER_DUMP_PATH"))) ++ if (!(data->path = getenv("VKD3D_SHADER_DUMP_PATH"))) + { + enabled = false; + return; + } + ++ data->profile = NULL; + if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) + { +- if (!(hlsl_source_info = vkd3d_find_struct(compile_info->next, HLSL_SOURCE_INFO))) +- return; +- +- if (!(profile = hlsl_get_target_info(hlsl_source_info->profile))) +- return; ++ const struct vkd3d_shader_hlsl_source_info *hlsl_source_info; + +- profile_name = profile->name; ++ if ((hlsl_source_info = vkd3d_find_struct(compile_info->next, HLSL_SOURCE_INFO))) ++ data->profile = hlsl_source_info->profile; + } + +- vkd3d_shader_dump_blob(path, profile_name, shader_get_source_type_suffix(compile_info->source_type), +- shader->code, shader->size); ++ vkd3d_compute_md5(compile_info->source.code, compile_info->source.size, ++ (uint32_t *)data->checksum, VKD3D_MD5_STANDARD); ++ data->source_suffix = shader_get_source_type_suffix(compile_info->source_type); ++ data->target_suffix = shader_get_target_type_suffix(compile_info->target_type); + } + + static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) +@@ -1497,6 +1548,7 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh + int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) + { + struct vkd3d_shader_message_context message_context; ++ struct shader_dump_data dump_data; + int ret; + + TRACE("compile_info %p, messages %p.\n", compile_info, messages); +@@ -1511,7 +1563,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char + + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); + +- vkd3d_shader_dump_shader(compile_info); ++ fill_shader_dump_data(compile_info, &dump_data); ++ vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, true); + + if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) + { +@@ -1620,6 +1673,7 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, char **messages) + { + struct vkd3d_shader_message_context message_context; ++ struct shader_dump_data dump_data; + int ret; + + TRACE("compile_info %p, out %p, messages %p.\n", compile_info, out, messages); +@@ -1634,7 +1688,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); + +- vkd3d_shader_dump_shader(compile_info); ++ fill_shader_dump_data(compile_info, &dump_data); ++ vkd3d_shader_dump_shader(&dump_data, compile_info->source.code, compile_info->source.size, true); + + if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) + { +@@ -1676,6 +1731,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, + } + } + ++ vkd3d_shader_dump_shader(&dump_data, out->code, out->size, false); ++ + vkd3d_shader_message_context_trace_messages(&message_context); + if (!vkd3d_shader_message_context_copy_messages(&message_context, messages)) + ret = VKD3D_ERROR_OUT_OF_MEMORY; +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index ef66a8ca07a..442885f53b4 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -1549,7 +1549,6 @@ void vkd3d_shader_warning(struct vkd3d_shader_message_context *context, const st + void vkd3d_shader_vwarning(struct vkd3d_shader_message_context *context, const struct vkd3d_shader_location *location, + enum vkd3d_shader_error error, const char *format, va_list args); + +-void vkd3d_shader_dump_shader(const struct vkd3d_shader_compile_info *compile_info); + uint64_t vkd3d_shader_init_config_flags(void); + void vkd3d_shader_trace_text_(const char *text, size_t size, const char *function); + #define vkd3d_shader_trace_text(text, size) \ +@@ -1580,7 +1579,13 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); + +-void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksum[4]); ++enum vkd3d_md5_variant ++{ ++ VKD3D_MD5_STANDARD, ++ VKD3D_MD5_DXBC, ++}; ++ ++void vkd3d_compute_md5(const void *dxbc, size_t size, uint32_t checksum[4], enum vkd3d_md5_variant variant); + + int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); +-- +2.45.2 + diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-f318e565f295d9f439e0e9ec52ba28835b3.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-f318e565f295d9f439e0e9ec52ba28835b3.patch deleted file mode 100644 index ea08de5c..00000000 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-f318e565f295d9f439e0e9ec52ba28835b3.patch +++ /dev/null @@ -1,18589 +0,0 @@ -From 4e98af49c317d0f0c54120036059749ad053e84d Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 7 Mar 2024 10:40:41 +1100 -Subject: [PATCH] Updated vkd3d to f318e565f295d9f439e0e9ec52ba28835b33a9ce. - ---- - libs/vkd3d/include/private/vkd3d_common.h | 76 +- - libs/vkd3d/include/private/vkd3d_memory.h | 3 +- - libs/vkd3d/include/vkd3d_shader.h | 243 +++- - libs/vkd3d/libs/vkd3d-common/blob.c | 1 + - libs/vkd3d/libs/vkd3d-common/debug.c | 14 +- - libs/vkd3d/libs/vkd3d-shader/checksum.c | 2 +- - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 29 +- - libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 718 ++++++---- - libs/vkd3d/libs/vkd3d-shader/dxbc.c | 6 +- - libs/vkd3d/libs/vkd3d-shader/dxil.c | 160 ++- - libs/vkd3d/libs/vkd3d-shader/fx.c | 1229 ++++++++++++++-- - libs/vkd3d/libs/vkd3d-shader/glsl.c | 198 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.c | 379 ++++- - libs/vkd3d/libs/vkd3d-shader/hlsl.h | 167 ++- - libs/vkd3d/libs/vkd3d-shader/hlsl.l | 136 +- - libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1259 +++++++++++++---- - libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1235 +++++++++++++--- - .../libs/vkd3d-shader/hlsl_constant_ops.c | 110 +- - libs/vkd3d/libs/vkd3d-shader/ir.c | 699 +++++++-- - libs/vkd3d/libs/vkd3d-shader/preproc.h | 2 +- - libs/vkd3d/libs/vkd3d-shader/preproc.l | 5 +- - libs/vkd3d/libs/vkd3d-shader/preproc.y | 2 +- - libs/vkd3d/libs/vkd3d-shader/spirv.c | 629 ++++---- - libs/vkd3d/libs/vkd3d-shader/tpf.c | 625 +++++--- - .../libs/vkd3d-shader/vkd3d_shader_main.c | 42 +- - .../libs/vkd3d-shader/vkd3d_shader_private.h | 34 +- - libs/vkd3d/libs/vkd3d/cache.c | 9 +- - libs/vkd3d/libs/vkd3d/command.c | 595 ++++---- - libs/vkd3d/libs/vkd3d/device.c | 190 ++- - libs/vkd3d/libs/vkd3d/resource.c | 58 +- - libs/vkd3d/libs/vkd3d/state.c | 262 +++- - libs/vkd3d/libs/vkd3d/utils.c | 4 +- - libs/vkd3d/libs/vkd3d/vkd3d_main.c | 8 +- - libs/vkd3d/libs/vkd3d/vkd3d_private.h | 15 +- - 34 files changed, 6981 insertions(+), 2163 deletions(-) - -diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h -index a9d709d10fe..c62dc00415f 100644 ---- a/libs/vkd3d/include/private/vkd3d_common.h -+++ b/libs/vkd3d/include/private/vkd3d_common.h -@@ -46,12 +46,22 @@ - - #define STATIC_ASSERT(e) extern void __VKD3D_STATIC_ASSERT__(int [(e) ? 1 : -1]) - -+#define VKD3D_ASSERT(cond) \ -+ do { \ -+ if (!(cond)) \ -+ ERR("Failed assertion: %s\n", #cond); \ -+ } while (0) -+ - #define MEMBER_SIZE(t, m) sizeof(((t *)0)->m) - - #define VKD3D_MAKE_TAG(ch0, ch1, ch2, ch3) \ - ((uint32_t)(ch0) | ((uint32_t)(ch1) << 8) \ - | ((uint32_t)(ch2) << 16) | ((uint32_t)(ch3) << 24)) - -+#define VKD3D_EXPAND(x) x -+#define VKD3D_STRINGIFY(x) #x -+#define VKD3D_EXPAND_AND_STRINGIFY(x) VKD3D_EXPAND(VKD3D_STRINGIFY(x)) -+ - #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') - #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') - #define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') -@@ -98,17 +108,11 @@ static inline uint64_t align(uint64_t addr, size_t alignment) - # define VKD3D_UNREACHABLE (void)0 - #endif /* __GNUC__ */ - --VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsigned int line) --{ -- fprintf(stderr, "%s:%u: Aborting, reached unreachable code.\n", filename, line); -- abort(); --} -- --#ifdef NDEBUG --#define vkd3d_unreachable() VKD3D_UNREACHABLE --#else --#define vkd3d_unreachable() vkd3d_unreachable_(__FILE__, __LINE__) --#endif -+#define vkd3d_unreachable() \ -+ do { \ -+ ERR("%s:%u: Unreachable code reached.\n", __FILE__, __LINE__); \ -+ VKD3D_UNREACHABLE; \ -+ } while (0) - - #ifdef VKD3D_NO_TRACE_MESSAGES - #define TRACE(args...) do { } while (0) -@@ -118,11 +122,19 @@ VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsig - #ifdef VKD3D_NO_DEBUG_MESSAGES - #define WARN(args...) do { } while (0) - #define FIXME(args...) do { } while (0) -+#define WARN_ON() (false) -+#define FIXME_ONCE(args...) do { } while (0) -+#endif -+ -+#ifdef VKD3D_NO_ERROR_MESSAGES -+#define ERR(args...) do { } while (0) -+#define MESSAGE(args...) do { } while (0) - #endif - - enum vkd3d_dbg_level - { - VKD3D_DBG_LEVEL_NONE, -+ VKD3D_DBG_LEVEL_MESSAGE, - VKD3D_DBG_LEVEL_ERR, - VKD3D_DBG_LEVEL_FIXME, - VKD3D_DBG_LEVEL_WARN, -@@ -143,7 +155,7 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); - #define VKD3D_DBG_LOG(level) \ - do { \ - const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \ -- VKD3D_DBG_PRINTF -+ VKD3D_DBG_PRINTF_##level - - #define VKD3D_DBG_LOG_ONCE(first_time_level, level) \ - do { \ -@@ -151,24 +163,50 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); - const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \ - ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \ - vkd3d_dbg_next_time = true; \ -- VKD3D_DBG_PRINTF -+ VKD3D_DBG_PRINTF_##level - - #define VKD3D_DBG_PRINTF(...) \ - vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) - -+#define VKD3D_DBG_PRINTF_TRACE(...) VKD3D_DBG_PRINTF(__VA_ARGS__) -+#define VKD3D_DBG_PRINTF_WARN(...) VKD3D_DBG_PRINTF(__VA_ARGS__) -+#define VKD3D_DBG_PRINTF_FIXME(...) VKD3D_DBG_PRINTF(__VA_ARGS__) -+#define VKD3D_DBG_PRINTF_MESSAGE(...) VKD3D_DBG_PRINTF(__VA_ARGS__) -+ -+#ifdef VKD3D_ABORT_ON_ERR -+#define VKD3D_DBG_PRINTF_ERR(...) \ -+ vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); \ -+ abort(); \ -+ } while (0) -+#else -+#define VKD3D_DBG_PRINTF_ERR(...) VKD3D_DBG_PRINTF(__VA_ARGS__) -+#endif -+ -+/* Used by vkd3d_unreachable(). */ -+#ifdef VKD3D_CROSSTEST -+#undef ERR -+#define ERR(...) do { fprintf(stderr, __VA_ARGS__); abort(); } while (0) -+#endif -+ - #ifndef TRACE --#define TRACE VKD3D_DBG_LOG(TRACE) -+#define TRACE VKD3D_DBG_LOG(TRACE) - #endif - - #ifndef WARN --#define WARN VKD3D_DBG_LOG(WARN) -+#define WARN VKD3D_DBG_LOG(WARN) - #endif - - #ifndef FIXME --#define FIXME VKD3D_DBG_LOG(FIXME) -+#define FIXME VKD3D_DBG_LOG(FIXME) - #endif - --#define ERR VKD3D_DBG_LOG(ERR) -+#ifndef ERR -+#define ERR VKD3D_DBG_LOG(ERR) -+#endif -+ -+#ifndef MESSAGE -+#define MESSAGE VKD3D_DBG_LOG(MESSAGE) -+#endif - - #ifndef TRACE_ON - #define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE) -@@ -178,7 +216,9 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); - #define WARN_ON() (vkd3d_dbg_get_level() >= VKD3D_DBG_LEVEL_WARN) - #endif - -+#ifndef FIXME_ONCE - #define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) -+#endif - - #define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name - -@@ -233,7 +273,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) - { - #ifdef _MSC_VER - return __popcnt(v); --#elif defined(__MINGW32__) -+#elif defined(HAVE_BUILTIN_POPCOUNT) - return __builtin_popcount(v); - #else - v -= (v >> 1) & 0x55555555; -diff --git a/libs/vkd3d/include/private/vkd3d_memory.h b/libs/vkd3d/include/private/vkd3d_memory.h -index 682d35c03c6..b157fc07cb7 100644 ---- a/libs/vkd3d/include/private/vkd3d_memory.h -+++ b/libs/vkd3d/include/private/vkd3d_memory.h -@@ -19,7 +19,6 @@ - #ifndef __VKD3D_MEMORY_H - #define __VKD3D_MEMORY_H - --#include - #include - #include - #include -@@ -44,7 +43,7 @@ static inline void *vkd3d_realloc(void *ptr, size_t size) - static inline void *vkd3d_calloc(size_t count, size_t size) - { - void *ptr; -- assert(count <= ~(size_t)0 / size); -+ VKD3D_ASSERT(!size || count <= ~(size_t)0 / size); - if (!(ptr = calloc(count, size))) - ERR("Out of memory.\n"); - return ptr; -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index d3afcc11b16..d37d8ebad9e 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -105,6 +105,11 @@ enum vkd3d_shader_structure_type - * \since 1.10 - */ - VKD3D_SHADER_STRUCTURE_TYPE_SCAN_COMBINED_RESOURCE_SAMPLER_INFO, -+ /** -+ * The structure is a vkd3d_shader_parameter_info structure. -+ * \since 1.13 -+ */ -+ VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), - }; -@@ -453,44 +458,191 @@ enum vkd3d_shader_binding_flag - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG), - }; - -+/** -+ * The manner in which a parameter value is provided to the shader, used in -+ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -+ */ - enum vkd3d_shader_parameter_type - { - VKD3D_SHADER_PARAMETER_TYPE_UNKNOWN, -+ /** The parameter value is embedded directly in the shader. */ - VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT, -+ /** -+ * The parameter value is provided to the shader via a specialization -+ * constant. This value is only supported for the SPIR-V target type. -+ */ - VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT, -+ /** -+ * The parameter value is provided to the shader as part of a uniform -+ * buffer. -+ * -+ * \since 1.13 -+ */ -+ VKD3D_SHADER_PARAMETER_TYPE_BUFFER, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_TYPE), - }; - -+/** -+ * The format of data provided to the shader, used in -+ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -+ */ - enum vkd3d_shader_parameter_data_type - { - VKD3D_SHADER_PARAMETER_DATA_TYPE_UNKNOWN, -+ /** The parameter is provided as a 32-bit unsigned integer. */ - VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32, -+ /** The parameter is provided as a 32-bit float. \since 1.13 */ -+ VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_DATA_TYPE), - }; - -+/** -+ * Names a specific shader parameter, used in -+ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -+ */ - enum vkd3d_shader_parameter_name - { - VKD3D_SHADER_PARAMETER_NAME_UNKNOWN, -+ /** -+ * The sample count of the framebuffer, as returned by the HLSL function -+ * GetRenderTargetSampleCount() or the GLSL builtin gl_NumSamples. -+ * -+ * This parameter should be specified when compiling to SPIR-V, which -+ * provides no builtin ability to query this information from the shader. -+ * -+ * The default value is 1. -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. -+ */ - VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, -+ /** -+ * Alpha test comparison function. When this parameter is provided, if the -+ * alpha component of the pixel shader colour output at location 0 fails the -+ * test, as defined by this function and the reference value provided by -+ * VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, the fragment will be -+ * discarded. -+ * -+ * This parameter, along with VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, -+ * can be used to implement fixed function alpha test, as present in -+ * Direct3D versions up to 9, if the target environment does not support -+ * alpha test as part of its own fixed-function API (as Vulkan and core -+ * OpenGL). -+ * -+ * The default value is VKD3D_SHADER_COMPARISON_FUNC_ALWAYS. -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. The value specified must be -+ * a member of enum vkd3d_shader_comparison_func. -+ * -+ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this -+ * version of vkd3d-shader. -+ * -+ * \since 1.13 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC, -+ /** -+ * Alpha test reference value. -+ * See VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC for documentation of -+ * alpha test. -+ * -+ * The default value is zero. -+ * -+ * \since 1.13 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, -+ /** -+ * Whether to use flat interpolation for fragment shader colour inputs. -+ * If the value is nonzero, inputs whose semantic usage is COLOR will use -+ * flat interpolation instead of linear. -+ * This parameter is ignored if the shader model is 4 or greater, since only -+ * shader model 3 and below do not specify the interpolation mode in the -+ * shader bytecode. -+ * -+ * This parameter can be used to implement fixed function shade mode, as -+ * present in Direct3D versions up to 9, if the target environment does not -+ * support shade mode as part of its own fixed-function API (as Vulkan and -+ * core OpenGL). -+ * -+ * The data type for this parameter must be -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. -+ * -+ * The default value is zero, i.e. use linear interpolation. -+ * -+ * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this -+ * version of vkd3d-shader. -+ * -+ * \since 1.13 -+ */ -+ VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION, - - VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), - }; - -+/** -+ * The value of an immediate constant parameter, used in -+ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -+ */ - struct vkd3d_shader_parameter_immediate_constant - { - union - { -+ /** -+ * The value if the parameter's data type is -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. -+ */ - uint32_t u32; -+ /** -+ * The value if the parameter's data type is -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. -+ * -+ * \since 1.13 -+ */ -+ float f32; - } u; - }; - -+/** -+ * The linkage of a specialization constant parameter, used in -+ * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -+ */ - struct vkd3d_shader_parameter_specialization_constant - { -+ /** The ID of the specialization constant. */ - uint32_t id; - }; - -+/** -+ * The linkage of a parameter specified through a uniform buffer, used in -+ * struct vkd3d_shader_parameter1. -+ */ -+struct vkd3d_shader_parameter_buffer -+{ -+ /** -+ * The set of the uniform buffer descriptor. If the target environment does -+ * not support descriptor sets, this value must be set to 0. -+ */ -+ unsigned int set; -+ /** The binding index of the uniform buffer descriptor. */ -+ unsigned int binding; -+ /** The byte offset of the parameter within the buffer. */ -+ uint32_t offset; -+}; -+ -+/** -+ * An individual shader parameter. -+ * -+ * This structure is an earlier version of struct vkd3d_shader_parameter1 -+ * which supports fewer parameter types; -+ * refer to that structure for usage information. -+ * -+ * Only the following types may be used with this structure: -+ * -+ * - VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT -+ * - VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT -+ */ - struct vkd3d_shader_parameter - { - enum vkd3d_shader_parameter_name name; -@@ -503,6 +655,56 @@ struct vkd3d_shader_parameter - } u; - }; - -+/** -+ * An individual shader parameter. -+ * -+ * This structure is used in struct vkd3d_shader_parameter_info; see there for -+ * explanation of shader parameters. -+ * -+ * For example, to specify the rasterizer sample count to the shader via an -+ * unsigned integer specialization constant with ID 3, -+ * set the following members: -+ * -+ * - \a name = VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT -+ * - \a type = VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT -+ * - \a data_type = VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32 -+ * - \a u.specialization_constant.id = 3 -+ * -+ * This structure is an extended version of struct vkd3d_shader_parameter. -+ */ -+struct vkd3d_shader_parameter1 -+{ -+ /** The builtin parameter to be mapped. */ -+ enum vkd3d_shader_parameter_name name; -+ /** How the parameter will be provided to the shader. */ -+ enum vkd3d_shader_parameter_type type; -+ /** -+ * The data type of the supplied parameter, which determines how it is to -+ * be interpreted. -+ */ -+ enum vkd3d_shader_parameter_data_type data_type; -+ union -+ { -+ /** -+ * Additional information if \a type is -+ * VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT. -+ */ -+ struct vkd3d_shader_parameter_immediate_constant immediate_constant; -+ /** -+ * Additional information if \a type is -+ * VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT. -+ */ -+ struct vkd3d_shader_parameter_specialization_constant specialization_constant; -+ /** -+ * Additional information if \a type is -+ * VKD3D_SHADER_PARAMETER_TYPE_BUFFER. -+ */ -+ struct vkd3d_shader_parameter_buffer buffer; -+ void *_pointer_pad; -+ uint32_t _pad[4]; -+ } u; -+}; -+ - /** - * Symbolic register indices for mapping uniform constant register sets in - * legacy Direct3D bytecode to constant buffer views in the target environment. -@@ -1674,7 +1876,7 @@ enum vkd3d_shader_sysval_semantic - VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX = 0x05, - /** Vertex ID; SV_VertexID in Direct3D. */ - VKD3D_SHADER_SV_VERTEX_ID = 0x06, -- /** Primtive ID; SV_PrimitiveID in Direct3D. */ -+ /** Primitive ID; SV_PrimitiveID in Direct3D. */ - VKD3D_SHADER_SV_PRIMITIVE_ID = 0x07, - /** Instance ID; SV_InstanceID in Direct3D. */ - VKD3D_SHADER_SV_INSTANCE_ID = 0x08, -@@ -1994,6 +2196,44 @@ struct vkd3d_shader_varying_map_info - unsigned int varying_count; - }; - -+/** -+ * Interface information regarding a builtin shader parameter. -+ * -+ * Like compile options specified with struct vkd3d_shader_compile_option, -+ * parameters are used to specify certain values which are not part of the -+ * source shader bytecode but which need to be specified in the shader bytecode -+ * in the target format. -+ * Unlike struct vkd3d_shader_compile_option, however, this structure allows -+ * parameters to be specified in a variety of different ways, as described by -+ * enum vkd3d_shader_parameter_type. -+ * -+ * This structure is an extended version of struct vkd3d_shader_parameter as -+ * used in struct vkd3d_shader_spirv_target_info, which allows more parameter -+ * types to be used, and also allows specifying parameters when compiling -+ * shaders to target types other than SPIR-V. If this structure is chained -+ * along with vkd3d_shader_spirv_target_info, any parameters specified in the -+ * latter structure are ignored. -+ * -+ * This structure is passed to vkd3d_shader_compile() and extends -+ * vkd3d_shader_compile_info. -+ * -+ * This structure contains only input parameters. -+ * -+ * \since 1.13 -+ */ -+struct vkd3d_shader_parameter_info -+{ -+ /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO. */ -+ enum vkd3d_shader_structure_type type; -+ /** Optional pointer to a structure containing further parameters. */ -+ const void *next; -+ -+ /** Pointer to an array of dynamic parameters for this shader instance. */ -+ const struct vkd3d_shader_parameter1 *parameters; -+ /** Size, in elements, of \ref parameters. */ -+ unsigned int parameter_count; -+}; -+ - #ifdef LIBVKD3D_SHADER_SOURCE - # define VKD3D_SHADER_API VKD3D_EXPORT - #else -@@ -2077,6 +2317,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported - * - vkd3d_shader_descriptor_offset_info - * - vkd3d_shader_hlsl_source_info - * - vkd3d_shader_interface_info -+ * - vkd3d_shader_parameter_info - * - vkd3d_shader_preprocess_info - * - vkd3d_shader_scan_combined_resource_sampler_info - * - vkd3d_shader_scan_descriptor_info -diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c -index f60ef7db769..c2c6ad67804 100644 ---- a/libs/vkd3d/libs/vkd3d-common/blob.c -+++ b/libs/vkd3d/libs/vkd3d-common/blob.c -@@ -20,6 +20,7 @@ - #define WIDL_C_INLINE_WRAPPERS - #endif - #define COBJMACROS -+ - #define CONST_VTABLE - #include "vkd3d.h" - #include "vkd3d_blob.h" -diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c -index 4523fc997ef..9a92f0ead02 100644 ---- a/libs/vkd3d/libs/vkd3d-common/debug.c -+++ b/libs/vkd3d/libs/vkd3d-common/debug.c -@@ -22,7 +22,6 @@ - - #include "vkd3d_common.h" - --#include - #include - #include - #include -@@ -45,11 +44,12 @@ extern const char *const vkd3d_dbg_env_name; - - static const char *const debug_level_names[] = - { -- [VKD3D_DBG_LEVEL_NONE ] = "none", -- [VKD3D_DBG_LEVEL_ERR ] = "err", -- [VKD3D_DBG_LEVEL_FIXME] = "fixme", -- [VKD3D_DBG_LEVEL_WARN ] = "warn", -- [VKD3D_DBG_LEVEL_TRACE] = "trace", -+ [VKD3D_DBG_LEVEL_NONE ] = "none", -+ [VKD3D_DBG_LEVEL_MESSAGE] = "message", -+ [VKD3D_DBG_LEVEL_ERR ] = "err", -+ [VKD3D_DBG_LEVEL_FIXME] = "fixme", -+ [VKD3D_DBG_LEVEL_WARN ] = "warn", -+ [VKD3D_DBG_LEVEL_TRACE] = "trace", - }; - - enum vkd3d_dbg_level vkd3d_dbg_get_level(void) -@@ -104,8 +104,6 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch - if (vkd3d_dbg_get_level() < level) - return; - -- assert(level < ARRAY_SIZE(debug_level_names)); -- - #ifdef _WIN32 - vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); - #elif HAVE_GETTID -diff --git a/libs/vkd3d/libs/vkd3d-shader/checksum.c b/libs/vkd3d/libs/vkd3d-shader/checksum.c -index 0910729a0e9..d9560628c77 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/checksum.c -+++ b/libs/vkd3d/libs/vkd3d-shader/checksum.c -@@ -288,7 +288,7 @@ void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksu - const uint8_t *ptr = dxbc; - struct md5_ctx ctx; - -- assert(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); -+ VKD3D_ASSERT(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); - ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; - size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; - -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -index 9abc2c4db70..77e9711300f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c -@@ -254,6 +254,10 @@ static const char * const shader_opcode_names[] = - [VKD3DSIH_PHASE ] = "phase", - [VKD3DSIH_PHI ] = "phi", - [VKD3DSIH_POW ] = "pow", -+ [VKD3DSIH_QUAD_READ_ACROSS_D ] = "quad_read_across_d", -+ [VKD3DSIH_QUAD_READ_ACROSS_X ] = "quad_read_across_x", -+ [VKD3DSIH_QUAD_READ_ACROSS_Y ] = "quad_read_across_y", -+ [VKD3DSIH_QUAD_READ_LANE_AT ] = "quad_read_lane_at", - [VKD3DSIH_RCP ] = "rcp", - [VKD3DSIH_REP ] = "rep", - [VKD3DSIH_RESINFO ] = "resinfo", -@@ -1199,7 +1203,7 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const - { - bool untyped = false; - -- switch (compiler->current->handler_idx) -+ switch (compiler->current->opcode) - { - case VKD3DSIH_MOV: - case VKD3DSIH_MOVC: -@@ -1755,7 +1759,7 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - { - struct vkd3d_string_buffer *buffer = &compiler->buffer; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_BREAKP: - case VKD3DSIH_CONTINUEP: -@@ -1853,8 +1857,13 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile - break; - - case VKD3DSIH_TEX: -- if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) -- vkd3d_string_buffer_printf(buffer, "p"); -+ if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0)) -+ { -+ if (ins->flags & VKD3DSI_TEXLD_PROJECT) -+ vkd3d_string_buffer_printf(buffer, "p"); -+ else if (ins->flags & VKD3DSI_TEXLD_BIAS) -+ vkd3d_string_buffer_printf(buffer, "b"); -+ } - break; - - case VKD3DSIH_WAVE_OP_ADD: -@@ -1910,7 +1919,7 @@ static void shader_dump_icb(struct vkd3d_d3d_asm_compiler *compiler, - } - else - { -- assert(icb->component_count == VKD3D_VEC4_SIZE); -+ VKD3D_ASSERT(icb->component_count == VKD3D_VEC4_SIZE); - for (i = 0; i < icb->element_count; ++i) - { - shader_print_hex_literal(compiler, " {", icb->data[4 * i + 0], ""); -@@ -1937,9 +1946,9 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, - if (ins->coissue) - vkd3d_string_buffer_printf(buffer, "+"); - -- shader_print_opcode(compiler, ins->handler_idx); -+ shader_print_opcode(compiler, ins->opcode); - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL: - case VKD3DSIH_DCL_UAV_TYPED: -@@ -2242,7 +2251,7 @@ static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic - case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "oDepthGE"; - case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "oDepthLE"; - /* SV_Coverage has name vCoverage when used as an input, -- * but it doens't appear in the signature in that case. */ -+ * but it doesn't appear in the signature in that case. */ - case VKD3D_SHADER_SV_COVERAGE: return "oMask"; - case VKD3D_SHADER_SV_STENCIL_REF: return "oStencilRef"; - default: return "??"; -@@ -2430,7 +2439,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_ELSE: - case VKD3DSIH_ENDIF: -@@ -2459,7 +2468,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, - - shader_dump_instruction(&compiler, ins); - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_ELSE: - case VKD3DSIH_IF: -diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -index bfd5b52b436..d05394c3ab7 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c -@@ -757,7 +757,7 @@ static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, - { - /* d3d shaders have a maximum of 8192 constants; we should not overrun - * this array. */ -- assert((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); -+ VKD3D_ASSERT((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); - bitmap_set(sm1->constants[set].def_mask, index); - } - } -@@ -1060,7 +1060,7 @@ static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) - - static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) - { -- if ((ins->handler_idx == VKD3DSIH_BREAKP || ins->handler_idx == VKD3DSIH_IF) && ins->flags) -+ if ((ins->opcode == VKD3DSIH_BREAKP || ins->opcode == VKD3DSIH_IF) && ins->flags) - { - vkd3d_shader_parser_warning(&sm1->p, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS, - "Ignoring unexpected instruction flags %#x.", ins->flags); -@@ -1142,23 +1142,23 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - goto fail; - } - -- if (ins->handler_idx == VKD3DSIH_DCL) -+ if (ins->opcode == VKD3DSIH_DCL) - { - shader_sm1_read_semantic(sm1, &p, &ins->declaration.semantic); - } -- else if (ins->handler_idx == VKD3DSIH_DEF) -+ else if (ins->opcode == VKD3DSIH_DEF) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_FLOAT); - shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } -- else if (ins->handler_idx == VKD3DSIH_DEFB) -+ else if (ins->opcode == VKD3DSIH_DEFB) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_SCALAR, VKD3D_DATA_UINT); - shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); - } -- else if (ins->handler_idx == VKD3DSIH_DEFI) -+ else if (ins->opcode == VKD3DSIH_DEFI) - { - shader_sm1_read_dst_param(sm1, &p, dst_param); - shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_INT); -@@ -1195,7 +1195,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str - return; - - fail: -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - *ptr = sm1->end; - } - -@@ -1272,7 +1272,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st - sm1->end = &code[token_count]; - - /* Estimate instruction count to avoid reallocation in most shaders. */ -- if (!vsir_program_init(program, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) -+ if (!vsir_program_init(program, compile_info, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) - return VKD3D_ERROR_OUT_OF_MEMORY; - - vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); -@@ -1326,7 +1326,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c - ins = &instructions->elements[instructions->count]; - shader_sm1_read_instruction(&sm1, ins); - -- if (ins->handler_idx == VKD3DSIH_INVALID) -+ if (ins->opcode == VKD3DSIH_INVALID) - { - WARN("Encountered unrecognized or invalid instruction.\n"); - vsir_program_cleanup(program); -@@ -1354,8 +1354,8 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c - return ret; - } - --bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) -+bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, -+ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) - { - unsigned int i; - -@@ -1365,56 +1365,56 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - bool output; - enum vkd3d_shader_type shader_type; - unsigned int major_version; -- D3DSHADER_PARAM_REGISTER_TYPE type; -+ enum vkd3d_shader_register_type type; - unsigned int offset; - } - register_table[] = - { -- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, -- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, -- -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, -- {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, -- {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, -- -- {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, -- {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, -- {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, -- {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, -- {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, -- {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, -- {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, -- -- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, -- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, -- -- {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, -- {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, -- {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -- {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, -- {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, -+ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_INPUT}, -+ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_TEXTURE}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, -+ {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_INPUT}, -+ {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_TEXTURE}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, -+ {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, -+ {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, -+ {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, -+ {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, -+ {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_FACE}, -+ {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, -+ -+ {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, -+ {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_FOG}, -+ {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, -+ {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, -+ {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) - { -- if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) -+ if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) - && output == register_table[i].output -- && ctx->profile->type == register_table[i].shader_type -- && ctx->profile->major_version == register_table[i].major_version) -+ && version->type == register_table[i].shader_type -+ && version->major == register_table[i].major_version) - { - *type = register_table[i].type; -- if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) -+ if (register_table[i].type == VKD3DSPR_MISCTYPE || register_table[i].type == VKD3DSPR_RASTOUT) - *reg = register_table[i].offset; - else -- *reg = semantic->index; -+ *reg = semantic_index; - return true; - } - } -@@ -1422,7 +1422,8 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - return false; - } - --bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) -+bool hlsl_sm1_usage_from_semantic(const char *semantic_name, -+ uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) - { - static const struct - { -@@ -1454,10 +1455,10 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU - - for (i = 0; i < ARRAY_SIZE(semantics); ++i) - { -- if (!ascii_strcasecmp(semantic->name, semantics[i].name)) -+ if (!ascii_strcasecmp(semantic_name, semantics[i].name)) - { - *usage = semantics[i].usage; -- *usage_idx = semantic->index; -+ *usage_idx = semantic_index; - return true; - } - } -@@ -1465,6 +1466,17 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU - return false; - } - -+struct d3dbc_compiler -+{ -+ struct vsir_program *program; -+ struct vkd3d_bytecode_buffer buffer; -+ struct vkd3d_shader_message_context *message_context; -+ -+ /* OBJECTIVE: Store all the required information in the other fields so -+ * that this hlsl_ctx is no longer necessary. */ -+ struct hlsl_ctx *ctx; -+}; -+ - static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) - { - if (type == VKD3D_SHADER_TYPE_VERTEX) -@@ -1480,7 +1492,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - case HLSL_CLASS_ARRAY: - return hlsl_sm1_class(type->e.array.type); - case HLSL_CLASS_MATRIX: -- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3DXPC_MATRIX_COLUMNS; - else -@@ -1497,13 +1509,22 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_VERTEX_SHADER: - return D3DXPC_OBJECT; -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_NULL: - break; - } - -@@ -1593,13 +1614,22 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) - case HLSL_CLASS_VERTEX_SHADER: - return D3DXPT_VERTEXSHADER; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_NULL: - break; - } - -@@ -1677,8 +1707,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) - list_move_tail(&ctx->extern_vars, &sorted); - } - --static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- struct hlsl_ir_function_decl *entry_func) -+void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) - { - size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; - unsigned int uniform_count = 0; -@@ -1739,11 +1768,11 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - } - else - { -- put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); -+ put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); - put_u32(buffer, var->bind_count[r]); - } - put_u32(buffer, 0); /* type */ -- put_u32(buffer, 0); /* FIXME: default value */ -+ put_u32(buffer, 0); /* default value */ - } - } - -@@ -1767,6 +1796,62 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - - write_sm1_type(buffer, var->data_type, ctab_start); - set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); -+ -+ if (var->default_values) -+ { -+ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int comp_count = hlsl_type_component_count(var->data_type); -+ unsigned int default_value_offset; -+ unsigned int k; -+ -+ default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); -+ set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); -+ -+ for (k = 0; k < comp_count; ++k) -+ { -+ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); -+ unsigned int comp_offset; -+ enum hlsl_regset regset; -+ -+ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); -+ if (regset == HLSL_REGSET_NUMERIC) -+ { -+ union -+ { -+ uint32_t u; -+ float f; -+ } uni; -+ -+ switch (comp_type->e.numeric.type) -+ { -+ case HLSL_TYPE_DOUBLE: -+ hlsl_fixme(ctx, &var->loc, "Write double default values."); -+ uni.u = 0; -+ break; -+ -+ case HLSL_TYPE_INT: -+ uni.f = var->default_values[k].number.i; -+ break; -+ -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ uni.f = var->default_values[k].number.u; -+ break; -+ -+ case HLSL_TYPE_HALF: -+ case HLSL_TYPE_FLOAT: -+ uni.u = var->default_values[k].number.u; -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); -+ } -+ } -+ } -+ - ++uniform_count; - } - } -@@ -1778,7 +1863,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); - } - --static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) -+static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) - { - return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) - | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); -@@ -1791,7 +1876,7 @@ struct sm1_instruction - - struct sm1_dst_register - { -- D3DSHADER_PARAM_REGISTER_TYPE type; -+ enum vkd3d_shader_register_type type; - D3DSHADER_PARAM_DSTMOD_TYPE mod; - unsigned int writemask; - uint32_t reg; -@@ -1799,19 +1884,45 @@ struct sm1_instruction - - struct sm1_src_register - { -- D3DSHADER_PARAM_REGISTER_TYPE type; -+ enum vkd3d_shader_register_type type; - D3DSHADER_PARAM_SRCMOD_TYPE mod; - unsigned int swizzle; - uint32_t reg; -- } srcs[3]; -+ } srcs[4]; - unsigned int src_count; - - unsigned int has_dst; - }; - -+static bool is_inconsequential_instr(const struct sm1_instruction *instr) -+{ -+ const struct sm1_src_register *src = &instr->srcs[0]; -+ const struct sm1_dst_register *dst = &instr->dst; -+ unsigned int i; -+ -+ if (instr->opcode != D3DSIO_MOV) -+ return false; -+ if (dst->mod != D3DSPDM_NONE) -+ return false; -+ if (src->mod != D3DSPSM_NONE) -+ return false; -+ if (src->type != dst->type) -+ return false; -+ if (src->reg != dst->reg) -+ return false; -+ -+ for (i = 0; i < 4; ++i) -+ { -+ if ((dst->writemask & (1 << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) -+ return false; -+ } -+ -+ return true; -+} -+ - static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) - { -- assert(reg->writemask); -+ VKD3D_ASSERT(reg->writemask); - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); - } - -@@ -1821,15 +1932,19 @@ static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, - put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); - } - --static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct sm1_instruction *instr) -+static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - uint32_t token = instr->opcode; - unsigned int i; - -+ if (is_inconsequential_instr(instr)) -+ return; -+ - token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); - -- if (ctx->profile->major_version > 1) -+ if (version->major > 1) - token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - -@@ -1845,54 +1960,53 @@ static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_w - src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); - } - --static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, -- const struct hlsl_reg *src3) -+static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst, -+ const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) - { - struct sm1_instruction instr = - { - .opcode = D3DSIO_DP2ADD, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, -- .srcs[2].type = D3DSPR_TEMP, -+ .srcs[2].type = VKD3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, - }; - -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, const struct hlsl_reg *src1, -- const struct hlsl_reg *src2, const struct hlsl_reg *src3) -+static void d3dbc_write_ternary_op(struct d3dbc_compiler *d3dbc, -+ D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -+ const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) - { - struct sm1_instruction instr = - { - .opcode = opcode, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, -- .srcs[2].type = D3DSPR_TEMP, -+ .srcs[2].type = VKD3DSPR_TEMP, - .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), - .srcs[2].reg = src3->id, - .src_count = 3, -@@ -1901,26 +2015,25 @@ static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buf - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src1, const struct hlsl_reg *src2) -+static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, -+ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) - { - struct sm1_instruction instr = - { - .opcode = opcode, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, -@@ -1928,49 +2041,48 @@ static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buff - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src1, const struct hlsl_reg *src2) -+static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, -+ const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) - { - struct sm1_instruction instr = - { - .opcode = opcode, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), - .srcs[0].reg = src1->id, -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), - .srcs[1].reg = src2->id, - .src_count = 2, - }; - -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, -- const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) -+static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, -+ const struct hlsl_reg *dst, const struct hlsl_reg *src, -+ D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) - { - struct sm1_instruction instr = - { - .opcode = opcode, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.mod = dst_mod, - .dst.writemask = dst->writemask, - .dst.reg = dst->id, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), - .srcs[0].reg = src->id, - .srcs[0].mod = src_mod, -@@ -1978,19 +2090,19 @@ static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe - }; - - sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &instr); -+ d3dbc_write_instruction(d3dbc, &instr); - } - --static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - const struct hlsl_ir_node *arg1 = expr->operands[0].node; - const struct hlsl_type *dst_type = expr->node.data_type; - const struct hlsl_type *src_type = arg1->data_type; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - - /* Narrowing casts were already lowered. */ -- assert(src_type->dimx == dst_type->dimx); -+ VKD3D_ASSERT(src_type->dimx == dst_type->dimx); - - switch (dst_type->e.numeric.type) - { -@@ -2004,7 +2116,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - /* Integrals are internally represented as floats, so no change is necessary.*/ - case HLSL_TYPE_HALF: - case HLSL_TYPE_FLOAT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_TYPE_DOUBLE: -@@ -2028,7 +2140,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - break; - case HLSL_TYPE_INT: - case HLSL_TYPE_UINT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_TYPE_BOOL: -@@ -2057,8 +2169,11 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - } - --static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+static void d3dbc_write_constant_defs(struct d3dbc_compiler *d3dbc) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - unsigned int i, x; - - for (i = 0; i < ctx->constant_defs.count; ++i) -@@ -2067,12 +2182,12 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - uint32_t token = D3DSIO_DEF; - const struct sm1_dst_register reg = - { -- .type = D3DSPR_CONST, -+ .type = VKD3DSPR_CONST, - .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = constant_reg->index, - }; - -- if (ctx->profile->major_version > 1) -+ if (version->major > 1) - token |= 5 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - -@@ -2082,32 +2197,32 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - } - } - --static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_var *var, bool output) -+static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, -+ const struct signature_element *element, bool output) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; - uint32_t token, usage_idx; - D3DDECLUSAGE usage; - bool ret; - -- if ((!output && !var->last_read) || (output && !var->first_write)) -- return; -- -- if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) -+ if (hlsl_sm1_register_from_semantic(version, element->semantic_name, -+ element->semantic_index, output, ®.type, ®.reg)) - { - usage = 0; - usage_idx = 0; - } - else - { -- ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); -- assert(ret); -- reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; -- reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; -+ ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); -+ VKD3D_ASSERT(ret); -+ reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; -+ reg.reg = element->register_index; - } - - token = D3DSIO_DCL; -- if (ctx->profile->major_version > 1) -+ if (version->major > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - -@@ -2116,39 +2231,47 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; - put_u32(buffer, token); - -- reg.writemask = (1 << var->data_type->dimx) - 1; -+ reg.writemask = element->mask; - write_sm1_dst_register(buffer, ®); - } - --static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) - { -+ struct vsir_program *program = d3dbc->program; -+ const struct vkd3d_shader_version *version; - bool write_in = false, write_out = false; -- struct hlsl_ir_var *var; - -- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) -+ version = &program->shader_version; -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) - write_in = true; -- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) -+ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) - write_in = write_out = true; -- else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) -+ else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) - write_in = true; - -- LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ if (write_in) - { -- if (write_in && var->is_input_semantic) -- write_sm1_semantic_dcl(ctx, buffer, var, false); -- if (write_out && var->is_output_semantic) -- write_sm1_semantic_dcl(ctx, buffer, var, true); -+ for (unsigned int i = 0; i < program->input_signature.element_count; ++i) -+ d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); -+ } -+ -+ if (write_out) -+ { -+ for (unsigned int i = 0; i < program->output_signature.element_count; ++i) -+ d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); - } - } - --static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, - unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; - struct sm1_dst_register reg = {0}; - uint32_t token, res_type = 0; - - token = D3DSIO_DCL; -- if (ctx->profile->major_version > 1) -+ if (version->major > 1) - token |= 2 << D3DSI_INSTLENGTH_SHIFT; - put_u32(buffer, token); - -@@ -2175,20 +2298,22 @@ static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu - token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; - put_u32(buffer, token); - -- reg.type = D3DSPR_SAMPLER; -+ reg.type = VKD3DSPR_COMBINED_SAMPLER; - reg.writemask = VKD3DSP_WRITEMASK_ALL; - reg.reg = reg_id; - - write_sm1_dst_register(buffer, ®); - } - --static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) -+static void d3dbc_write_sampler_dcls(struct d3dbc_compiler *d3dbc) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - enum hlsl_sampler_dim sampler_dim; - unsigned int i, count, reg_id; - struct hlsl_ir_var *var; - -- if (ctx->profile->major_version < 2) -+ if (version->major < 2) - return; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -@@ -2210,39 +2335,38 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b - continue; - } - -- reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; -- write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); -+ reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i; -+ d3dbc_write_sampler_dcl(d3dbc, reg_id, sampler_dim); - } - } - } - } - --static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_CONST, -+ .srcs[0].type = VKD3DSPR_CONST, - .srcs[0].reg = constant->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), - .src_count = 1, - }; - -- assert(instr->reg.allocated); -- assert(constant->reg.allocated); -+ VKD3D_ASSERT(instr->reg.allocated); -+ VKD3D_ASSERT(constant->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -+static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, - const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) - { - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); -@@ -2255,28 +2379,69 @@ static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_ - - src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); - dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); -- write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); -+ d3dbc_write_unary_op(d3dbc, opcode, &dst, &src, 0, 0); - } - } - --static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+static void d3dbc_write_sincos(struct d3dbc_compiler *d3dbc, enum hlsl_ir_expr_op op, -+ const struct hlsl_reg *dst, const struct hlsl_reg *src) - { -+ struct sm1_instruction instr = -+ { -+ .opcode = D3DSIO_SINCOS, -+ -+ .dst.type = VKD3DSPR_TEMP, -+ .dst.writemask = dst->writemask, -+ .dst.reg = dst->id, -+ .has_dst = 1, -+ -+ .srcs[0].type = VKD3DSPR_TEMP, -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), -+ .srcs[0].reg = src->id, -+ .src_count = 1, -+ }; -+ -+ if (op == HLSL_OP1_COS_REDUCED) -+ VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_0); -+ else /* HLSL_OP1_SIN_REDUCED */ -+ VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_1); -+ -+ if (d3dbc->ctx->profile->major_version < 3) -+ { -+ instr.src_count = 3; -+ -+ instr.srcs[1].type = VKD3DSPR_CONST; -+ instr.srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); -+ instr.srcs[1].reg = d3dbc->ctx->d3dsincosconst1.id; -+ -+ instr.srcs[2].type = VKD3DSPR_CONST; -+ instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); -+ instr.srcs[2].reg = d3dbc->ctx->d3dsincosconst2.id; -+ } -+ -+ d3dbc_write_instruction(d3dbc, &instr); -+} -+ -+static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) -+{ -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); - struct hlsl_ir_node *arg1 = expr->operands[0].node; - struct hlsl_ir_node *arg2 = expr->operands[1].node; - struct hlsl_ir_node *arg3 = expr->operands[2].node; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - -- assert(instr->reg.allocated); -+ VKD3D_ASSERT(instr->reg.allocated); - - if (expr->op == HLSL_OP1_REINTERPRET) - { -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); - return; - } - - if (expr->op == HLSL_OP1_CAST) - { -- write_sm1_cast(ctx, buffer, instr); -+ d3dbc_write_cast(d3dbc, instr); - return; - } - -@@ -2290,70 +2455,75 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - switch (expr->op) - { - case HLSL_OP1_ABS: -- write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_DSX: -- write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_DSY: -- write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); - break; - - case HLSL_OP1_EXP2: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); -+ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_EXP); - break; - - case HLSL_OP1_LOG2: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); -+ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_LOG); - break; - - case HLSL_OP1_NEG: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); - break; - - case HLSL_OP1_SAT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); - break; - - case HLSL_OP1_RCP: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); -+ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); - break; - - case HLSL_OP1_RSQ: -- write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); -+ d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); -+ break; -+ -+ case HLSL_OP1_COS_REDUCED: -+ case HLSL_OP1_SIN_REDUCED: -+ d3dbc_write_sincos(d3dbc, expr->op, &instr->reg, &arg1->reg); - break; - - case HLSL_OP2_ADD: -- write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MAX: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MIN: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_MUL: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP1_FRACT: -- write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); -+ d3dbc_write_unary_op(d3dbc, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); - break; - - case HLSL_OP2_DOT: - switch (arg1->data_type->dimx) - { - case 4: -- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_dot(d3dbc, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case 3: -- write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_dot(d3dbc, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); - break; - - default: -@@ -2362,27 +2532,31 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - break; - - case HLSL_OP2_LOGIC_AND: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_LOGIC_OR: -- write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP2_SLT: -- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL) - hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); -- write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); -+ d3dbc_write_binary_op(d3dbc, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); - break; - - case HLSL_OP3_CMP: -- if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) -+ if (version->type == VKD3D_SHADER_TYPE_VERTEX) - hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); -- write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -+ d3dbc_write_ternary_op(d3dbc, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - case HLSL_OP3_DP2ADD: -- write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -+ d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); -+ break; -+ -+ case HLSL_OP3_MAD: -+ d3dbc_write_ternary_op(d3dbc, D3DSIO_MAD, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); - break; - - default: -@@ -2391,50 +2565,49 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - } - --static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_block *block); -+static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block); - --static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_if *iff = hlsl_ir_if(instr); - const struct hlsl_ir_node *condition; - struct sm1_instruction sm1_ifc, sm1_else, sm1_endif; - - condition = iff->condition.node; -- assert(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); -+ VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); - - sm1_ifc = (struct sm1_instruction) - { - .opcode = D3DSIO_IFC, - .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), - .srcs[0].reg = condition->reg.id, - .srcs[0].mod = 0, - -- .srcs[1].type = D3DSPR_TEMP, -+ .srcs[1].type = VKD3DSPR_TEMP, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), - .srcs[1].reg = condition->reg.id, - .srcs[1].mod = D3DSPSM_NEG, - - .src_count = 2, - }; -- write_sm1_instruction(ctx, buffer, &sm1_ifc); -- write_sm1_block(ctx, buffer, &iff->then_block); -+ d3dbc_write_instruction(d3dbc, &sm1_ifc); -+ d3dbc_write_block(d3dbc, &iff->then_block); - - if (!list_empty(&iff->else_block.instrs)) - { - sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; -- write_sm1_instruction(ctx, buffer, &sm1_else); -- write_sm1_block(ctx, buffer, &iff->else_block); -+ d3dbc_write_instruction(d3dbc, &sm1_else); -+ d3dbc_write_block(d3dbc, &iff->else_block); - } - - sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; -- write_sm1_instruction(ctx, buffer, &sm1_endif); -+ d3dbc_write_instruction(d3dbc, &sm1_endif); - } - --static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); - -@@ -2448,54 +2621,55 @@ static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - { - .opcode = D3DSIO_TEXKILL, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = reg->id, - .dst.writemask = reg->writemask, - .has_dst = 1, - }; - -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - break; - } - - default: -- hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); -+ hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); - } - } - --static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) -+static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_load *load = hlsl_ir_load(instr); -+ struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), - .src_count = 1, - }; - -- assert(instr->reg.allocated); -+ VKD3D_ASSERT(instr->reg.allocated); - - if (load->src.var->is_uniform) - { -- assert(reg.allocated); -- sm1_instr.srcs[0].type = D3DSPR_CONST; -+ VKD3D_ASSERT(reg.allocated); -+ sm1_instr.srcs[0].type = VKD3DSPR_CONST; - } - else if (load->src.var->is_input_semantic) - { -- if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, -- false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) -+ if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name, -+ load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) - { -- assert(reg.allocated); -- sm1_instr.srcs[0].type = D3DSPR_INPUT; -+ VKD3D_ASSERT(reg.allocated); -+ sm1_instr.srcs[0].type = VKD3DSPR_INPUT; - sm1_instr.srcs[0].reg = reg.id; - } - else -@@ -2503,32 +2677,34 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - struct hlsl_ir_node *coords = load->coords.node; -+ struct hlsl_ir_node *ddx = load->ddx.node; -+ struct hlsl_ir_node *ddy = load->ddy.node; - unsigned int sampler_offset, reg_id; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - struct sm1_instruction sm1_instr; - - sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); -- reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; -+ reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; - - sm1_instr = (struct sm1_instruction) - { -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = coords->reg.id, -- .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), -+ .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask), - -- .srcs[1].type = D3DSPR_SAMPLER, -+ .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER, - .srcs[1].reg = reg_id, - .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), - -@@ -2546,69 +2722,82 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ - sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; - break; - -+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS: -+ sm1_instr.opcode = D3DSIO_TEX; -+ sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; -+ break; -+ -+ case HLSL_RESOURCE_SAMPLE_GRAD: -+ sm1_instr.opcode = D3DSIO_TEXLDD; -+ -+ sm1_instr.srcs[2].type = VKD3DSPR_TEMP; -+ sm1_instr.srcs[2].reg = ddx->reg.id; -+ sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask); -+ -+ sm1_instr.srcs[3].type = VKD3DSPR_TEMP; -+ sm1_instr.srcs[3].reg = ddy->reg.id; -+ sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask); -+ -+ sm1_instr.src_count += 2; -+ break; -+ - default: - hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); - return; - } - -- assert(instr->reg.allocated); -+ VKD3D_ASSERT(instr->reg.allocated); - -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { -+ const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; - const struct hlsl_ir_store *store = hlsl_ir_store(instr); -- const struct hlsl_ir_node *rhs = store->rhs.node; -+ struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); -+ const struct hlsl_ir_node *rhs = store->rhs.node; - struct sm1_instruction sm1_instr = - { - .opcode = D3DSIO_MOV, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = reg.id, - .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = rhs->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), - .src_count = 1, - }; - -- if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) -- { -- hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks."); -- return; -- } -- - if (store->lhs.var->is_output_semantic) - { -- if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) -+ if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) - { -- sm1_instr.dst.type = D3DSPR_TEMP; -+ sm1_instr.dst.type = VKD3DSPR_TEMP; - sm1_instr.dst.reg = 0; - } -- else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, -- true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) -+ else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name, -+ store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) - { -- assert(reg.allocated); -- sm1_instr.dst.type = D3DSPR_OUTPUT; -+ VKD3D_ASSERT(reg.allocated); -+ sm1_instr.dst.type = VKD3DSPR_OUTPUT; - sm1_instr.dst.reg = reg.id; - } - else - sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; - } - else -- assert(reg.allocated); -+ VKD3D_ASSERT(reg.allocated); - - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_ir_node *instr) -+static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) - { - const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); - const struct hlsl_ir_node *val = swizzle->val.node; -@@ -2616,27 +2805,27 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer - { - .opcode = D3DSIO_MOV, - -- .dst.type = D3DSPR_TEMP, -+ .dst.type = VKD3DSPR_TEMP, - .dst.reg = instr->reg.id, - .dst.writemask = instr->reg.writemask, - .has_dst = 1, - -- .srcs[0].type = D3DSPR_TEMP, -+ .srcs[0].type = VKD3DSPR_TEMP, - .srcs[0].reg = val->reg.id, - .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), - swizzle->swizzle, instr->data_type->dimx), - .src_count = 1, - }; - -- assert(instr->reg.allocated); -- assert(val->reg.allocated); -+ VKD3D_ASSERT(instr->reg.allocated); -+ VKD3D_ASSERT(val->reg.allocated); - sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); -- write_sm1_instruction(ctx, buffer, &sm1_instr); -+ d3dbc_write_instruction(d3dbc, &sm1_instr); - } - --static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, -- const struct hlsl_block *block) -+static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block) - { -+ struct hlsl_ctx *ctx = d3dbc->ctx; - const struct hlsl_ir_node *instr; - - LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -@@ -2656,38 +2845,38 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - vkd3d_unreachable(); - - case HLSL_IR_CONSTANT: -- write_sm1_constant(ctx, buffer, instr); -+ d3dbc_write_constant(d3dbc, instr); - break; - - case HLSL_IR_EXPR: -- write_sm1_expr(ctx, buffer, instr); -+ d3dbc_write_expr(d3dbc, instr); - break; - - case HLSL_IR_IF: - if (hlsl_version_ge(ctx, 2, 1)) -- write_sm1_if(ctx, buffer, instr); -+ d3dbc_write_if(d3dbc, instr); - else - hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); - break; - - case HLSL_IR_JUMP: -- write_sm1_jump(ctx, buffer, instr); -+ d3dbc_write_jump(d3dbc, instr); - break; - - case HLSL_IR_LOAD: -- write_sm1_load(ctx, buffer, instr); -+ d3dbc_write_load(d3dbc, instr); - break; - - case HLSL_IR_RESOURCE_LOAD: -- write_sm1_resource_load(ctx, buffer, instr); -+ d3dbc_write_resource_load(d3dbc, instr); - break; - - case HLSL_IR_STORE: -- write_sm1_store(ctx, buffer, instr); -+ d3dbc_write_store(d3dbc, instr); - break; - - case HLSL_IR_SWIZZLE: -- write_sm1_swizzle(ctx, buffer, instr); -+ d3dbc_write_swizzle(d3dbc, instr); - break; - - default: -@@ -2696,32 +2885,45 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * - } - } - --int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) -+/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving -+ * data from the other parameters instead, so it can be removed as an argument -+ * and be declared in vkd3d_shader_private.h and used without relying on HLSL -+ * IR structs. */ -+int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -+ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -- struct vkd3d_bytecode_buffer buffer = {0}; -+ const struct vkd3d_shader_version *version = &program->shader_version; -+ struct d3dbc_compiler d3dbc = {0}; -+ struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; -+ -+ d3dbc.ctx = ctx; -+ d3dbc.program = program; -+ d3dbc.message_context = message_context; - -- put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); -+ put_u32(buffer, sm1_version(version->type, version->major, version->minor)); - -- write_sm1_uniforms(ctx, &buffer, entry_func); -+ bytecode_put_bytes(buffer, ctab->code, ctab->size); - -- write_sm1_constant_defs(ctx, &buffer); -- write_sm1_semantic_dcls(ctx, &buffer); -- write_sm1_sampler_dcls(ctx, &buffer); -- write_sm1_block(ctx, &buffer, &entry_func->body); -+ d3dbc_write_constant_defs(&d3dbc); -+ d3dbc_write_semantic_dcls(&d3dbc); -+ d3dbc_write_sampler_dcls(&d3dbc); -+ d3dbc_write_block(&d3dbc, &entry_func->body); - -- put_u32(&buffer, D3DSIO_END); -+ put_u32(buffer, D3DSIO_END); - -- if (buffer.status) -- ctx->result = buffer.status; -+ if (buffer->status) -+ ctx->result = buffer->status; - - if (!ctx->result) - { -- out->code = buffer.data; -- out->size = buffer.size; -+ out->code = buffer->data; -+ out->size = buffer->size; - } - else - { -- vkd3d_free(buffer.data); -+ vkd3d_free(buffer->data); - } - return ctx->result; - } -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -index 4b9f67235aa..184788dc57e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c -@@ -29,7 +29,7 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void - { - struct vkd3d_shader_dxbc_section_desc *section; - -- assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); -+ VKD3D_ASSERT(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); - - section = &dxbc->sections[dxbc->section_count++]; - section->tag = tag; -@@ -983,7 +983,7 @@ static int shader_parse_root_signature(const struct vkd3d_shader_code *data, - { - struct vkd3d_shader_root_signature_desc1 *v_1_1 = &desc->u.v_1_1; - -- assert(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); -+ VKD3D_ASSERT(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); - - v_1_1->parameter_count = count; - if (v_1_1->parameter_count) -@@ -1777,7 +1777,7 @@ int vkd3d_shader_convert_root_signature(struct vkd3d_shader_versioned_root_signa - } - else - { -- assert(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); -+ VKD3D_ASSERT(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); - ret = convert_root_signature_to_v1_1(dst, src); - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c -index 73a8d8687c5..4a17c62292b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/dxil.c -+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c -@@ -458,6 +458,8 @@ enum dx_intrinsic_opcode - DX_WAVE_ACTIVE_OP = 119, - DX_WAVE_ACTIVE_BIT = 120, - DX_WAVE_PREFIX_OP = 121, -+ DX_QUAD_READ_LANE_AT = 122, -+ DX_QUAD_OP = 123, - DX_LEGACY_F32TOF16 = 130, - DX_LEGACY_F16TOF32 = 131, - DX_WAVE_ALL_BIT_COUNT = 135, -@@ -576,6 +578,13 @@ enum dxil_wave_op_kind - WAVE_OP_MAX = 3, - }; - -+enum dxil_quad_op_kind -+{ -+ QUAD_READ_ACROSS_X = 0, -+ QUAD_READ_ACROSS_Y = 1, -+ QUAD_READ_ACROSS_D = 2, -+}; -+ - struct sm6_pointer_info - { - const struct sm6_type *type; -@@ -932,7 +941,7 @@ static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length - if (!length) - return 0; - -- assert(length < 32); -+ VKD3D_ASSERT(length < 32); - - if (sm6_parser_is_end(sm6)) - { -@@ -940,7 +949,7 @@ static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length - return 0; - } - -- assert(sm6->bitpos < 32); -+ VKD3D_ASSERT(sm6->bitpos < 32); - bits = *sm6->ptr >> sm6->bitpos; - l = 32 - sm6->bitpos; - if (l <= length) -@@ -1199,7 +1208,7 @@ static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) - struct dxil_global_abbrev *global_abbrev; - enum vkd3d_result ret; - -- assert(block->id == BLOCKINFO_BLOCK); -+ VKD3D_ASSERT(block->id == BLOCKINFO_BLOCK); - - if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) - || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) -@@ -1468,7 +1477,7 @@ static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct - if (sm6->abbrevs[i]->block_id == block->id) - block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; - -- assert(abbrev_count == block->abbrev_count); -+ VKD3D_ASSERT(abbrev_count == block->abbrev_count); - } - - if ((ret = dxil_block_read(block, sm6)) < 0) -@@ -1546,7 +1555,7 @@ static char *dxil_record_to_string(const struct dxil_record *record, unsigned in - unsigned int i; - char *str; - -- assert(offset <= record->operand_count); -+ VKD3D_ASSERT(offset <= record->operand_count); - if (!(str = vkd3d_calloc(record->operand_count - offset + 1, 1))) - { - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, -@@ -1834,7 +1843,7 @@ static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) - ++sm6->type_count; - } - -- assert(sm6->type_count == type_count); -+ VKD3D_ASSERT(sm6->type_count == type_count); - - if (struct_name) - { -@@ -2207,13 +2216,13 @@ static inline bool sm6_value_is_function_dcl(const struct sm6_value *value) - - static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) - { -- assert(sm6_value_is_function_dcl(fn)); -+ VKD3D_ASSERT(sm6_value_is_function_dcl(fn)); - return fn->u.function.is_prototype && !strncmp(fn->u.function.name, "dx.op.", 6); - } - - static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_parser *sm6) - { -- assert(sm6->value_count < sm6->value_capacity); -+ VKD3D_ASSERT(sm6->value_count < sm6->value_capacity); - return &sm6->values[sm6->value_count]; - } - -@@ -3395,7 +3404,7 @@ static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_pa - enum vkd3d_shader_opcode handler_idx) - { - struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); -- assert(ins); -+ VKD3D_ASSERT(ins); - vsir_instruction_init(ins, &sm6->p.location, handler_idx); - ++sm6->p.program->instructions.count; - return ins; -@@ -3642,7 +3651,7 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init - { - const struct sm6_value *value; - -- assert(index); -+ VKD3D_ASSERT(index); - --index; - if (!(value = sm6_parser_get_value_safe(sm6, index)) || (!sm6_value_is_icb(value) && !sm6_value_is_undef(value))) - { -@@ -3755,21 +3764,21 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) - for (i = 0; i < sm6->p.program->instructions.count; ++i) - { - ins = &sm6->p.program->instructions.elements[i]; -- if (ins->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) -+ if (ins->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) - { - ins->declaration.indexable_temp.initialiser = resolve_forward_initialiser( - (uintptr_t)ins->declaration.indexable_temp.initialiser, sm6); - } -- else if (ins->handler_idx == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) -+ else if (ins->opcode == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) - { - ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); - } -- else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) -+ else if (ins->opcode == VKD3DSIH_DCL_TGSM_RAW) - { - ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); - ins->flags = 0; - } -- else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) -+ else if (ins->opcode == VKD3DSIH_DCL_TGSM_STRUCTURED) - { - ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); - ins->flags = 0; -@@ -3886,7 +3895,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade - if (e->register_count > 1 || (is_patch_constant && vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) - param->reg.idx[count++].offset = 0; - -- assert(count < ARRAY_SIZE(param->reg.idx)); -+ VKD3D_ASSERT(count < ARRAY_SIZE(param->reg.idx)); - param->reg.idx[count++].offset = i; - param->reg.idx_count = count; - } -@@ -4289,7 +4298,7 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, const struct dxil_reco - if (!(flags & FP_ALLOW_UNSAFE_ALGEBRA)) - ins->flags |= VKD3DSI_PRECISE_X; - flags &= ~FP_ALLOW_UNSAFE_ALGEBRA; -- /* SPIR-V FPFastMathMode is only available in the Kernel executon model. */ -+ /* SPIR-V FPFastMathMode is only available in the Kernel execution model. */ - silence_warning = !(flags & ~(FP_NO_NAN | FP_NO_INF | FP_NO_SIGNED_ZEROS | FP_ALLOW_RECIPROCAL)); - break; - case VKD3DSIH_IADD: -@@ -4402,7 +4411,7 @@ static void sm6_parser_emit_br(struct sm6_parser *sm6, const struct dxil_record - code_block->terminator.false_block = sm6_function_get_block(function, record->operands[1], sm6); - } - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, -@@ -4619,6 +4628,8 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co - return VKD3DSIH_IMAX; - case DX_IMIN: - return VKD3DSIH_IMIN; -+ case DX_QUAD_READ_LANE_AT: -+ return VKD3DSIH_QUAD_READ_LANE_AT; - case DX_UMAX: - return VKD3DSIH_UMAX; - case DX_UMIN: -@@ -4855,10 +4866,10 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr - return; - src_param_init_vector_from_reg(src_param, &buffer->u.handle.reg); - register_index_address_init(&src_param->reg.idx[2], operands[1], sm6); -- assert(src_param->reg.idx_count == 3); -+ VKD3D_ASSERT(src_param->reg.idx_count == 3); - - type = sm6_type_get_scalar_type(dst->type, 0); -- assert(type); -+ VKD3D_ASSERT(type); - src_param->reg.data_type = vkd3d_data_type_from_sm6_type(type); - if (data_type_is_64_bit(src_param->reg.data_type)) - src_param->swizzle = vsir_swizzle_64_from_32(src_param->swizzle); -@@ -4962,7 +4973,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int - reg->non_uniform = !!sm6_value_get_constant_uint(operands[3]); - - /* NOP is used to flag no instruction emitted. */ -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static void sm6_parser_emit_dx_stream(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -@@ -5200,7 +5211,7 @@ static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_in - instruction_dst_param_init_temp_vector(ins++, sm6); - state->temp_idx = 1; - -- /* DXIL does not have an instrinsic for sample info, and resinfo is expected to return -+ /* DXIL does not have an intrinsic for sample info, and resinfo is expected to return - * the sample count in .w for MS textures. The result is always a struct of 4 x uint32. */ - vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SAMPLE_INFO); - ins->flags = VKD3DSI_SAMPLE_INFO_UINT; -@@ -5331,7 +5342,7 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin - - if (!is_patch_constant && !operands[3]->is_undefined) - { -- assert(src_param->reg.idx_count > count); -+ VKD3D_ASSERT(src_param->reg.idx_count > count); - register_index_address_init(&src_param->reg.idx[count], operands[3], sm6); - } - -@@ -5370,6 +5381,47 @@ static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intr - sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_PRIMID, VKD3D_DATA_UINT); - } - -+static enum vkd3d_shader_opcode dx_map_quad_op(enum dxil_quad_op_kind op) -+{ -+ switch (op) -+ { -+ case QUAD_READ_ACROSS_X: -+ return VKD3DSIH_QUAD_READ_ACROSS_X; -+ case QUAD_READ_ACROSS_Y: -+ return VKD3DSIH_QUAD_READ_ACROSS_Y; -+ case QUAD_READ_ACROSS_D: -+ return VKD3DSIH_QUAD_READ_ACROSS_D; -+ default: -+ return VKD3DSIH_INVALID; -+ } -+} -+ -+static void sm6_parser_emit_dx_quad_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, -+ const struct sm6_value **operands, struct function_emission_state *state) -+{ -+ struct vkd3d_shader_instruction *ins = state->ins; -+ struct vkd3d_shader_src_param *src_param; -+ enum vkd3d_shader_opcode opcode; -+ enum dxil_quad_op_kind quad_op; -+ -+ quad_op = sm6_value_get_constant_uint(operands[1]); -+ if ((opcode = dx_map_quad_op(quad_op)) == VKD3DSIH_INVALID) -+ { -+ FIXME("Unhandled quad op kind %u.\n", quad_op); -+ vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, -+ "Quad op kind %u is unhandled.", quad_op); -+ return; -+ } -+ -+ vsir_instruction_init(ins, &sm6->p.location, opcode); -+ -+ if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) -+ return; -+ src_param_init_from_value(src_param, operands[0]); -+ -+ instruction_dst_param_init_ssa_scalar(ins, sm6); -+} -+ - static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, - const struct sm6_value **operands, struct function_emission_state *state) - { -@@ -6229,6 +6281,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = - [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, - [DX_OUTPUT_CONTROL_POINT_ID ] = {"i", "", sm6_parser_emit_dx_output_control_point_id}, - [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, -+ [DX_QUAD_OP ] = {"n", "Rc", sm6_parser_emit_dx_quad_op}, -+ [DX_QUAD_READ_LANE_AT ] = {"n", "Ri", sm6_parser_emit_dx_binary}, - [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, - [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, - [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, -@@ -6346,7 +6400,7 @@ static bool sm6_parser_validate_dx_op(struct sm6_parser *sm6, enum dx_intrinsic_ - - info = &sm6_dx_op_table[op]; - -- assert(info->ret_type[0]); -+ VKD3D_ASSERT(info->ret_type[0]); - if (!sm6_parser_validate_operand_type(sm6, dst, info->ret_type[0], NULL, true)) - { - WARN("Failed to validate return type for dx intrinsic id %u, '%s'.\n", op, name); -@@ -6381,7 +6435,7 @@ static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shade - { - const struct sm6_type *type; - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - - if (!dst->type) - return; -@@ -6551,7 +6605,7 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ - else if (to->u.width > from->u.width) - { - op = (code == CAST_ZEXT) ? VKD3DSIH_UTOU : VKD3DSIH_ITOI; -- assert(from->u.width == 1 || to->u.width == 64); -+ VKD3D_ASSERT(from->u.width == 1 || to->u.width == 64); - is_valid = from_int && to_int; - } - break; -@@ -6628,7 +6682,7 @@ static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_recor - { - *dst = *value; - dst->type = type; -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - return; - } - -@@ -6739,7 +6793,7 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor - * do not otherwise occur, so deleting these avoids the need for backend support. */ - if (sm6_type_is_bool(type_a) && code == ICMP_NE && sm6_value_is_constant_zero(b)) - { -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - *dst = *a; - return; - } -@@ -7039,7 +7093,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record - reg->idx_count = 2; - dst->structure_stride = src->structure_stride; - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -7087,7 +7141,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor - - if (ptr->structure_stride) - { -- assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); -+ VKD3D_ASSERT(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); - vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); - - if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -@@ -7189,7 +7243,7 @@ static void sm6_parser_emit_phi(struct sm6_parser *sm6, const struct dxil_record - incoming[j].block = sm6_function_get_block(function, record->operands[i + 1], sm6); - } - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - - qsort(incoming, phi->incoming_count, sizeof(*incoming), phi_incoming_compare); - -@@ -7224,7 +7278,7 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record - - code_block->terminator.type = TERMINATOR_RET; - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -7270,7 +7324,7 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco - - if (ptr->structure_stride) - { -- assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); -+ VKD3D_ASSERT(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); - vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED); - - if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) -@@ -7326,7 +7380,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec - if (!(src = sm6_parser_get_value_by_ref(sm6, record, type, &i)) - || !sm6_value_validate_is_register(src, sm6)) - return; -- assert(i == 2); -+ VKD3D_ASSERT(i == 2); - - if (src->type != type) - { -@@ -7384,7 +7438,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec - terminator->cases[i / 2u].value = sm6_value_get_constant_uint64(src); - } - -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - } - - static void sm6_parser_emit_vselect(struct sm6_parser *sm6, const struct dxil_record *record, -@@ -7636,7 +7690,7 @@ static void metadata_attachment_record_apply(const struct dxil_record *record, e - "Ignoring a nested metadata attachment."); - } - -- assert(record->operand_count & 1); -+ VKD3D_ASSERT(record->operand_count & 1); - for (i = 1; i < record->operand_count; i += 2) - { - if (!(m = sm6_parser_find_metadata_kind(sm6, record->operands[i]))) -@@ -7843,7 +7897,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - } - - ins = &code_block->instructions[code_block->instruction_count]; -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - - dst = sm6_parser_get_current_value(sm6); - fwd_type = dst->type; -@@ -7922,7 +7976,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - - if (sm6->p.failed) - return VKD3D_ERROR; -- assert(ins->handler_idx != VKD3DSIH_INVALID); - - if (record->attachment) - metadata_attachment_record_apply(record->attachment, record->code, ins, dst, sm6); -@@ -7933,9 +7986,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const - code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; - } - if (code_block) -- code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; -- else -- assert(ins->handler_idx == VKD3DSIH_NOP); -+ code_block->instruction_count += ins->opcode != VKD3DSIH_NOP; - - if (dst->type && fwd_type && dst->type != fwd_type) - { -@@ -8002,7 +8053,7 @@ static void sm6_block_emit_terminator(const struct sm6_block *block, struct sm6_ - switch_case = &block->terminator.cases[i]; - if (!(case_block = switch_case->block)) - { -- assert(sm6->p.failed); -+ VKD3D_ASSERT(sm6->p.failed); - continue; - } - if (switch_case->is_default) -@@ -8071,7 +8122,7 @@ static void sm6_block_emit_phi(const struct sm6_block *block, struct sm6_parser - if (incoming_block) - vsir_src_param_init_label(&src_params[index + 1], incoming_block->id); - else -- assert(sm6->p.failed); -+ VKD3D_ASSERT(sm6->p.failed); - } - - dst_param_init(dst_param); -@@ -8735,7 +8786,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc - - if (!m) - { -- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; -+ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; - ins->declaration.raw_resource.resource.reg.write_mask = 0; - return &ins->declaration.raw_resource.resource; - } -@@ -8760,7 +8811,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc - "A typed resource has no data type."); - } - -- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; -+ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; - for (i = 0; i < VKD3D_VEC4_SIZE; ++i) - ins->declaration.semantic.resource_data_type[i] = resource_values.data_type; - ins->declaration.semantic.resource_type = resource_type; -@@ -8770,14 +8821,14 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc - } - else if (kind == RESOURCE_KIND_RAWBUFFER) - { -- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; -+ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; - ins->declaration.raw_resource.resource.reg.write_mask = 0; - - return &ins->declaration.raw_resource.resource; - } - else if (kind == RESOURCE_KIND_STRUCTUREDBUFFER) - { -- ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; -+ ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; - ins->declaration.structured_resource.byte_stride = resource_values.byte_stride; - ins->declaration.structured_resource.resource.reg.write_mask = 0; - -@@ -8858,7 +8909,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, - d->kind = kind; - d->reg_type = VKD3DSPR_RESOURCE; - d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_RESOURCE; -- d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL) -+ d->resource_data_type = (ins->opcode == VKD3DSIH_DCL) - ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; - - init_resource_declaration(resource, VKD3DSPR_RESOURCE, d->reg_data_type, d->id, &d->range); -@@ -8932,7 +8983,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, - d->kind = values[0]; - d->reg_type = VKD3DSPR_UAV; - d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_UAV; -- d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL_UAV_TYPED) -+ d->resource_data_type = (ins->opcode == VKD3DSIH_DCL_UAV_TYPED) - ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; - - init_resource_declaration(resource, VKD3DSPR_UAV, d->reg_data_type, d->id, &d->range); -@@ -10155,12 +10206,13 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 - return NULL; - } - --static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, const char *source_name, -+static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, -+ const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) - { - size_t count, length, function_count, expected_function_count, byte_code_size = dxbc_desc->byte_code_size; -+ const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; - struct shader_signature *patch_constant_signature, *output_signature, *input_signature; -- const struct vkd3d_shader_location location = {.source_name = source_name}; - uint32_t version_token, dxil_version, token_count, magic; - const uint32_t *byte_code = dxbc_desc->byte_code; - unsigned int chunk_offset, chunk_size; -@@ -10251,9 +10303,9 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro - - /* Estimate instruction count to avoid reallocation in most shaders. */ - count = max(token_count, 400) - 400; -- if (!vsir_program_init(program, &version, (count + (count >> 2)) / 2u + 10)) -+ if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- vkd3d_shader_parser_init(&sm6->p, program, message_context, source_name); -+ vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); - sm6->ptr = &sm6->start[1]; - sm6->bitpos = 2; - -@@ -10489,7 +10541,7 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co - uint32_t *byte_code = NULL; - int ret; - -- ERR("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); -+ MESSAGE("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); - - dxbc_desc.is_dxil = true; - if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, -@@ -10514,7 +10566,7 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co - dxbc_desc.byte_code = byte_code; - } - -- ret = sm6_parser_init(&sm6, program, compile_info->source_name, message_context, &dxbc_desc); -+ ret = sm6_parser_init(&sm6, program, compile_info, message_context, &dxbc_desc); - free_dxbc_shader_desc(&dxbc_desc); - vkd3d_free(byte_code); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c -index 57b4ac24212..a1d1fd6572f 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/fx.c -+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c -@@ -56,6 +56,114 @@ static void string_storage_destroy(struct rb_entry *entry, void *context) - vkd3d_free(string_entry); - } - -+struct function_component -+{ -+ const char *name; -+ bool lhs_has_index; -+ unsigned int lhs_index; -+}; -+ -+static const struct state_block_function_info -+{ -+ const char *name; -+ unsigned int min_args, max_args; -+ const struct function_component components[3]; -+ unsigned int min_profile; -+} -+function_info[] = -+{ -+ {"SetBlendState", 3, 3, { { "AB_BlendFactor" }, { "AB_SampleMask" }, { "BlendState" } }, 4 }, -+ {"SetDepthStencilState", 2, 2, { { "DS_StencilRef" }, { "DepthStencilState" } }, 4 }, -+ {"SetRasterizerState", 1, 1, { { "RasterizerState" } }, 4 }, -+ {"SetVertexShader", 1, 1, { { "VertexShader" } }, 4 }, -+ {"SetDomainShader", 1, 1, { { "DomainShader" } }, 5 }, -+ {"SetHullShader", 1, 1, { { "HullShader" } }, 5 }, -+ {"SetGeometryShader", 1, 1, { { "GeometryShader" } }, 4 }, -+ {"SetPixelShader", 1, 1, { { "PixelShader" } }, 4 }, -+ {"SetComputeShader", 1, 1, { { "ComputeShader" } }, 4 }, -+ {"OMSetRenderTargets", 2, 9, { {0} }, 4 }, -+}; -+ -+static const struct state_block_function_info *get_state_block_function_info(const char *name) -+{ -+ for (unsigned int i = 0; i < ARRAY_SIZE(function_info); ++i) -+ { -+ if (!strcmp(name, function_info[i].name)) -+ return &function_info[i]; -+ } -+ return NULL; -+} -+ -+static void add_function_component(struct function_component **components, const char *name, -+ bool lhs_has_index, unsigned int lhs_index) -+{ -+ struct function_component *comp = *components; -+ -+ comp->name = name; -+ comp->lhs_has_index = lhs_has_index; -+ comp->lhs_index = lhs_index; -+ -+ *components = *components + 1; -+} -+ -+static void get_state_block_function_components(const struct state_block_function_info *info, -+ struct function_component *components, unsigned int comp_count) -+{ -+ unsigned int i; -+ -+ VKD3D_ASSERT(comp_count <= info->max_args); -+ -+ if (info->min_args == info->max_args) -+ { -+ const struct function_component *c = info->components; -+ for (i = 0; i < comp_count; ++i, ++c) -+ add_function_component(&components, c->name, c->lhs_has_index, c->lhs_index); -+ return; -+ } -+ -+ if (!strcmp(info->name, "OMSetRenderTargets")) -+ { -+ for (i = 0; i < comp_count - 2; ++i) -+ add_function_component(&components, "RenderTargetView", true, i + 1); -+ add_function_component(&components, "DepthStencilView", false, 0); -+ add_function_component(&components, "RenderTargetView", true, 0); -+ } -+} -+ -+bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, -+ const struct vkd3d_shader_location *loc) -+{ -+ if (entry->is_function_call) -+ { -+ const struct state_block_function_info *info = get_state_block_function_info(entry->name); -+ -+ if (!info) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, -+ "Invalid state block function '%s'.", entry->name); -+ return false; -+ } -+ if (entry->args_count < info->min_args || entry->args_count > info->max_args) -+ { -+ if (info->min_args == info->max_args) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, -+ "Invalid argument count for state block function '%s' (expected %u).", -+ entry->name, info->min_args); -+ } -+ else -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, -+ "Invalid argument count for state block function '%s' (expected from %u to %u).", -+ entry->name, info->min_args, info->max_args); -+ } -+ return false; -+ } -+ } -+ -+ return true; -+} -+ - struct fx_write_context; - - struct fx_write_context_ops -@@ -63,6 +171,7 @@ struct fx_write_context_ops - uint32_t (*write_string)(const char *string, struct fx_write_context *fx); - void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); - void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); -+ void (*write_annotation)(struct hlsl_ir_var *var, struct fx_write_context *fx); - bool are_child_effects_supported; - }; - -@@ -94,6 +203,10 @@ struct fx_write_context - uint32_t texture_count; - uint32_t uav_count; - uint32_t sampler_state_count; -+ uint32_t depth_stencil_state_count; -+ uint32_t rasterizer_state_count; -+ uint32_t blend_state_count; -+ uint32_t string_count; - int status; - - bool child_effect; -@@ -122,14 +235,46 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) - - static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) - { -- if (var->state_block_count) -- hlsl_fixme(fx->ctx, &var->loc, "Write state block assignments."); -- - fx->ops->write_pass(var, fx); - } - -+static uint32_t write_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) -+{ -+ struct hlsl_ctx *ctx = fx->ctx; -+ struct hlsl_ir_var *v; -+ uint32_t count = 0; -+ -+ if (!scope) -+ return 0; -+ -+ LIST_FOR_EACH_ENTRY(v, &scope->vars, struct hlsl_ir_var, scope_entry) -+ { -+ if (!v->default_values) -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Annotation variable is missing default value."); -+ -+ fx->ops->write_annotation(v, fx); -+ ++count; -+ } -+ -+ return count; -+} -+ -+static void write_fx_4_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t count_offset, count; -+ -+ count_offset = put_u32(buffer, 0); -+ count = write_annotations(scope, fx); -+ set_u32(buffer, count_offset, count); -+} -+ - static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); - static const char * get_fx_4_type_name(const struct hlsl_type *type); -+static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx); -+static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, -+ uint32_t count_offset, struct fx_write_context *fx); - - static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) - { -@@ -138,7 +283,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context - unsigned int elements_count; - const char *name; - -- assert(fx->ctx->profile->major_version >= 4); -+ VKD3D_ASSERT(fx->ctx->profile->major_version >= 4); - - if (type->class == HLSL_CLASS_ARRAY) - { -@@ -274,15 +419,14 @@ static uint32_t write_fx_4_string(const char *string, struct fx_write_context *f - static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) - { - struct vkd3d_bytecode_buffer *buffer = &fx->structured; -- uint32_t name_offset; -+ uint32_t name_offset, count_offset; - - name_offset = write_string(var->name, fx); - put_u32(buffer, name_offset); -- put_u32(buffer, 0); /* Assignment count. */ -- put_u32(buffer, 0); /* Annotation count. */ -+ count_offset = put_u32(buffer, 0); - -- /* TODO: annotations */ -- /* TODO: assignments */ -+ write_fx_4_annotations(var->annotations, fx); -+ write_fx_4_state_block(var, 0, count_offset, fx); - } - - static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) -@@ -297,6 +441,12 @@ static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx - - /* TODO: annotations */ - /* TODO: assignments */ -+ -+ if (var->state_block_count && var->state_blocks[0]->count) -+ hlsl_fixme(fx->ctx, &var->loc, "Write pass assignments."); -+ -+ /* For some reason every pass adds to the total shader object count. */ -+ fx->shader_count++; - } - - static uint32_t get_fx_4_type_size(const struct hlsl_type *type) -@@ -402,6 +552,9 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - case HLSL_CLASS_UAV: - return uav_type_names[type->sampler_dim]; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: -+ return "DepthStencilState"; -+ - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - return "DepthStencilView"; - -@@ -414,6 +567,9 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - case HLSL_CLASS_PIXEL_SHADER: - return "PixelShader"; - -+ case HLSL_CLASS_STRING: -+ return "String"; -+ - default: - return type->name; - } -@@ -421,10 +577,20 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) - - static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) - { -+ struct field_offsets -+ { -+ uint32_t name; -+ uint32_t semantic; -+ uint32_t offset; -+ uint32_t type; -+ }; -+ uint32_t name_offset, offset, total_size, packed_size, stride, numeric_desc; - struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -- uint32_t name_offset, offset, size, stride, numeric_desc; -+ struct field_offsets *field_offsets = NULL; -+ struct hlsl_ctx *ctx = fx->ctx; - uint32_t elements_count = 0; - const char *name; -+ size_t i; - - /* Resolve arrays to element type and number of elements. */ - if (type->class == HLSL_CLASS_ARRAY) -@@ -436,6 +602,22 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - name = get_fx_4_type_name(type); - - name_offset = write_string(name, fx); -+ if (type->class == HLSL_CLASS_STRUCT) -+ { -+ if (!(field_offsets = hlsl_calloc(ctx, type->e.record.field_count, sizeof(*field_offsets)))) -+ return 0; -+ -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ const struct hlsl_struct_field *field = &type->e.record.fields[i]; -+ -+ field_offsets[i].name = write_string(field->name, fx); -+ field_offsets[i].semantic = write_string(field->semantic.raw_name, fx); -+ field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC]; -+ field_offsets[i].type = write_type(field->type, fx); -+ } -+ } -+ - offset = put_u32_unaligned(buffer, name_offset); - - switch (type->class) -@@ -446,13 +628,21 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - put_u32_unaligned(buffer, 1); - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_STRING: - put_u32_unaligned(buffer, 2); - break; - -@@ -464,43 +654,50 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_NULL: - vkd3d_unreachable(); - -- case HLSL_CLASS_STRING: - case HLSL_CLASS_VOID: - FIXME("Writing type class %u is not implemented.\n", type->class); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); - return 0; - } - -- size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); -+ /* Structures can only contain numeric fields, this is validated during variable declaration. */ -+ total_size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); -+ packed_size = 0; -+ if (type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type)) -+ packed_size = hlsl_type_component_count(type) * sizeof(float); - if (elements_count) -- size *= elements_count; -+ { -+ total_size *= elements_count; -+ packed_size *= elements_count; -+ } - stride = align(stride, 4 * sizeof(float)); - - put_u32_unaligned(buffer, elements_count); -- put_u32_unaligned(buffer, size); /* Total size. */ -- put_u32_unaligned(buffer, stride); /* Stride. */ -- put_u32_unaligned(buffer, size); -+ put_u32_unaligned(buffer, total_size); -+ put_u32_unaligned(buffer, stride); -+ put_u32_unaligned(buffer, packed_size); - - if (type->class == HLSL_CLASS_STRUCT) - { -- size_t i; -- - put_u32_unaligned(buffer, type->e.record.field_count); - for (i = 0; i < type->e.record.field_count; ++i) - { -- const struct hlsl_struct_field *field = &type->e.record.fields[i]; -- uint32_t semantic_offset, field_type_offset; -+ const struct field_offsets *field = &field_offsets[i]; - -- name_offset = write_string(field->name, fx); -- semantic_offset = write_string(field->semantic.name, fx); -- field_type_offset = write_type(field->type, fx); -+ put_u32_unaligned(buffer, field->name); -+ put_u32_unaligned(buffer, field->semantic); -+ put_u32_unaligned(buffer, field->offset); -+ put_u32_unaligned(buffer, field->type); -+ } - -- put_u32_unaligned(buffer, name_offset); -- put_u32_unaligned(buffer, semantic_offset); -- put_u32_unaligned(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); -- put_u32_unaligned(buffer, field_type_offset); -+ if (ctx->profile->major_version == 5) -+ { -+ put_u32_unaligned(buffer, 0); /* Base class type */ -+ put_u32_unaligned(buffer, 0); /* Interface count */ - } - } - else if (type->class == HLSL_CLASS_TEXTURE) -@@ -556,18 +753,46 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co - { - put_u32_unaligned(buffer, 6); - } -+ else if (type->class == HLSL_CLASS_RASTERIZER_STATE) -+ { -+ put_u32_unaligned(buffer, 4); -+ } -+ else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) -+ { -+ put_u32_unaligned(buffer, 3); -+ } -+ else if (type->class == HLSL_CLASS_BLEND_STATE) -+ { -+ put_u32_unaligned(buffer, 2); -+ } -+ else if (type->class == HLSL_CLASS_STRING) -+ { -+ put_u32_unaligned(buffer, 1); -+ } - else if (hlsl_is_numeric_type(type)) - { - numeric_desc = get_fx_4_numeric_type_description(type, fx); - put_u32_unaligned(buffer, numeric_desc); - } -+ else if (type->class == HLSL_CLASS_COMPUTE_SHADER) -+ { -+ put_u32_unaligned(buffer, 28); -+ } -+ else if (type->class == HLSL_CLASS_HULL_SHADER) -+ { -+ put_u32_unaligned(buffer, 29); -+ } -+ else if (type->class == HLSL_CLASS_DOMAIN_SHADER) -+ { -+ put_u32_unaligned(buffer, 30); -+ } - else - { - FIXME("Type %u is not supported.\n", type->class); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); -- return 0; - } - -+ vkd3d_free(field_offsets); - return offset; - } - -@@ -581,8 +806,9 @@ static void write_fx_4_technique(struct hlsl_ir_var *var, struct fx_write_contex - name_offset = write_string(var->name, fx); - put_u32(buffer, name_offset); - count_offset = put_u32(buffer, 0); -- put_u32(buffer, 0); /* Annotation count. */ -+ write_fx_4_annotations(var->annotations, fx); - -+ count = 0; - LIST_FOR_EACH_ENTRY(pass, &var->scope->vars, struct hlsl_ir_var, scope_entry) - { - write_pass(pass, fx); -@@ -617,7 +843,7 @@ static void write_group(struct hlsl_ir_var *var, struct fx_write_context *fx) - - put_u32(buffer, name_offset); - count_offset = put_u32(buffer, 0); /* Technique count */ -- put_u32(buffer, 0); /* Annotation count */ -+ write_fx_4_annotations(var ? var->annotations : NULL, fx); - - count = fx->technique_count; - write_techniques(var ? var->scope : fx->ctx->globals, fx); -@@ -668,6 +894,13 @@ static uint32_t write_fx_2_string(const char *string, struct fx_write_context *f - return offset; - } - -+static uint32_t get_fx_2_type_class(const struct hlsl_type *type) -+{ -+ if (type->class == HLSL_CLASS_MATRIX) -+ return D3DXPC_MATRIX_ROWS; -+ return hlsl_sm1_class(type); -+} -+ - static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic, - struct fx_write_context *fx) - { -@@ -683,10 +916,10 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - } - - name_offset = write_string(name, fx); -- semantic_offset = write_string(semantic->name, fx); -+ semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0; - - offset = put_u32(buffer, hlsl_sm1_base_type(type)); -- put_u32(buffer, hlsl_sm1_class(type)); -+ put_u32(buffer, get_fx_2_type_class(type)); - put_u32(buffer, name_offset); - put_u32(buffer, semantic_offset); - put_u32(buffer, elements_count); -@@ -705,6 +938,10 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - case HLSL_CLASS_STRUCT: - put_u32(buffer, type->e.record.field_count); - break; -+ case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_PIXEL_SHADER: -+ fx->shader_count += elements_count; -+ break; - default: - ; - } -@@ -716,7 +953,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n - const struct hlsl_struct_field *field = &type->e.record.fields[i]; - - /* Validated in check_invalid_object_fields(). */ -- assert(hlsl_is_numeric_type(field->type)); -+ VKD3D_ASSERT(hlsl_is_numeric_type(field->type)); - write_fx_2_parameter(field->type, field->name, &field->semantic, fx); - } - } -@@ -794,6 +1031,9 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f - case HLSL_CLASS_MATRIX: - case HLSL_CLASS_STRUCT: - /* FIXME: write actual initial value */ -+ if (var->default_values) -+ hlsl_fixme(fx->ctx, &var->loc, "Write default values.\n"); -+ - offset = put_u32(buffer, 0); - - for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) -@@ -850,15 +1090,24 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type - hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); - return false; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_UAV: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: - return false; - - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_NULL: - /* This cannot appear as an extern variable. */ - break; - } -@@ -910,7 +1159,7 @@ static const struct fx_write_context_ops fx_2_ops = - - static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - { -- uint32_t offset, size, technique_count, parameter_count, object_count; -+ uint32_t offset, size, technique_count, shader_count, parameter_count, object_count; - struct vkd3d_bytecode_buffer buffer = { 0 }; - struct vkd3d_bytecode_buffer *structured; - struct fx_write_context fx; -@@ -927,7 +1176,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - - parameter_count = put_u32(structured, 0); /* Parameter count */ - technique_count = put_u32(structured, 0); -- put_u32(structured, 0); /* Unknown */ -+ shader_count = put_u32(structured, 0); - object_count = put_u32(structured, 0); - - write_fx_2_parameters(&fx); -@@ -936,6 +1185,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - - write_techniques(ctx->globals, &fx); - set_u32(structured, technique_count, fx.technique_count); -+ set_u32(structured, shader_count, fx.shader_count); - - put_u32(structured, 0); /* String count */ - put_u32(structured, 0); /* Resource count */ -@@ -972,9 +1222,93 @@ static const struct fx_write_context_ops fx_4_ops = - .write_string = write_fx_4_string, - .write_technique = write_fx_4_technique, - .write_pass = write_fx_4_pass, -+ .write_annotation = write_fx_4_annotation, - .are_child_effects_supported = true, - }; - -+static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hlsl_default_value *value, -+ struct fx_write_context *fx) -+{ -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(value_type); -+ uint32_t elements_count = hlsl_get_multiarray_size(value_type), i, j; -+ struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; -+ struct hlsl_ctx *ctx = fx->ctx; -+ uint32_t offset = buffer->size; -+ unsigned int comp_count; -+ -+ if (!value) -+ return 0; -+ -+ comp_count = hlsl_type_component_count(type); -+ -+ for (i = 0; i < elements_count; ++i) -+ { -+ switch (type->class) -+ { -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_VECTOR: -+ case HLSL_CLASS_MATRIX: -+ { -+ switch (type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ case HLSL_TYPE_INT: -+ case HLSL_TYPE_UINT: -+ case HLSL_TYPE_BOOL: -+ -+ for (j = 0; j < comp_count; ++j) -+ { -+ put_u32_unaligned(buffer, value->number.u); -+ value++; -+ } -+ break; -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Writing default values for numeric type %u is not implemented.", -+ type->e.numeric.type); -+ } -+ -+ break; -+ } -+ case HLSL_CLASS_STRUCT: -+ { -+ struct hlsl_struct_field *fields = type->e.record.fields; -+ -+ for (j = 0; j < type->e.record.field_count; ++j) -+ { -+ write_fx_4_default_value(fields[i].type, value, fx); -+ value += hlsl_type_component_count(fields[i].type); -+ } -+ break; -+ } -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Writing default values for class %u is not implemented.", type->class); -+ } -+ } -+ -+ return offset; -+} -+ -+static void write_fx_4_string_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i; -+ const struct hlsl_default_value *value = var->default_values; -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ struct hlsl_ctx *ctx = fx->ctx; -+ uint32_t offset; -+ -+ if (!value) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "String objects have to be initialized."); -+ return; -+ } -+ -+ for (i = 0; i < elements_count; ++i, ++value) -+ { -+ offset = write_fx_4_string(value->string, fx); -+ put_u32(buffer, offset); -+ } -+} -+ - static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, struct fx_write_context *fx) - { - struct vkd3d_bytecode_buffer *buffer = &fx->structured; -@@ -984,22 +1318,20 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st - { - HAS_EXPLICIT_BIND_POINT = 0x4, - }; -- struct hlsl_ctx *ctx = fx->ctx; - -- /* Explicit bind point. */ -- if (var->reg_reservation.reg_type) -+ if (var->has_explicit_bind_point) - flags |= HAS_EXPLICIT_BIND_POINT; - - type_offset = write_type(var->data_type, fx); - name_offset = write_string(var->name, fx); -- semantic_offset = write_string(var->semantic.name, fx); -+ semantic_offset = write_string(var->semantic.raw_name, fx); - - put_u32(buffer, name_offset); - put_u32(buffer, type_offset); - - semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ -- put_u32(buffer, var->buffer_offset); /* Offset in the constant buffer */ -- value_offset = put_u32(buffer, 0); /* Default value offset */ -+ put_u32(buffer, var->buffer_offset * 4); /* Offset in the constant buffer, in bytes. */ -+ value_offset = put_u32(buffer, 0); - put_u32(buffer, flags); /* Flags */ - - if (shared) -@@ -1008,17 +1340,43 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st - } - else - { -- /* FIXME: write default value */ -- set_u32(buffer, value_offset, 0); -+ uint32_t offset = write_fx_4_default_value(var->data_type, var->default_values, fx); -+ set_u32(buffer, value_offset, offset); - -- put_u32(buffer, 0); /* Annotations count */ -- if (has_annotations(var)) -- hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); -+ write_fx_4_annotations(var->annotations, fx); - - fx->numeric_variable_count++; - } - } - -+static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t name_offset, type_offset, offset; -+ struct hlsl_ctx *ctx = fx->ctx; -+ -+ name_offset = write_string(var->name, fx); -+ type_offset = write_type(var->data_type, fx); -+ -+ put_u32(buffer, name_offset); -+ put_u32(buffer, type_offset); -+ -+ if (hlsl_is_numeric_type(type)) -+ { -+ offset = write_fx_4_default_value(var->data_type, var->default_values, fx); -+ put_u32(buffer, offset); -+ } -+ else if (type->class == HLSL_CLASS_STRING) -+ { -+ write_fx_4_string_initializer(var, fx); -+ } -+ else -+ { -+ hlsl_fixme(ctx, &var->loc, "Writing annotations for type class %u is not implemented.", type->class); -+ } -+} -+ - struct rhs_named_value - { - const char *name; -@@ -1086,11 +1444,8 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl - struct hlsl_ctx *ctx = fx->ctx; - struct hlsl_ir_node *value = entry->args->node; - -- if (entry->lhs_has_index) -- hlsl_fixme(ctx, &var->loc, "Unsupported assignment to array element."); -- - put_u32(buffer, entry->name_id); -- put_u32(buffer, 0); /* TODO: destination index */ -+ put_u32(buffer, entry->lhs_index); - type_offset = put_u32(buffer, 0); - rhs_offset = put_u32(buffer, 0); - -@@ -1104,6 +1459,17 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl - assignment_type = 1; - break; - } -+ case HLSL_IR_LOAD: -+ { -+ struct hlsl_ir_load *l = hlsl_ir_load(value); -+ -+ if (l->src.path_len) -+ hlsl_fixme(ctx, &var->loc, "Indexed access in RHS values is not implemented."); -+ -+ value_offset = write_fx_4_string(l->src.var->name, fx); -+ assignment_type = 2; -+ break; -+ } - default: - hlsl_fixme(ctx, &var->loc, "Unsupported assignment type for state %s.", entry->name); - } -@@ -1112,14 +1478,28 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl - set_u32(buffer, rhs_offset, value_offset); - } - --static bool state_block_contains_state(const char *name, unsigned int start, struct hlsl_state_block *block) -+static bool state_block_contains_state(const struct hlsl_state_block_entry *entry, unsigned int start_index, -+ struct hlsl_state_block *block) - { - unsigned int i; - -- for (i = start; i < block->count; ++i) -+ for (i = start_index; i < block->count; ++i) - { -- if (!ascii_strcasecmp(block->entries[i]->name, name)) -- return true; -+ const struct hlsl_state_block_entry *cur = block->entries[i]; -+ -+ if (cur->is_function_call) -+ continue; -+ -+ if (ascii_strcasecmp(cur->name, entry->name)) -+ continue; -+ -+ if (cur->lhs_has_index != entry->lhs_has_index) -+ continue; -+ -+ if (cur->lhs_has_index && cur->lhs_index != entry->lhs_index) -+ continue; -+ -+ return true; - } - - return false; -@@ -1131,6 +1511,24 @@ struct replace_state_context - struct hlsl_ir_var *var; - }; - -+static bool lower_null_constant(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -+{ -+ struct hlsl_ir_node *c; -+ -+ if (instr->type != HLSL_IR_CONSTANT) -+ return false; -+ if (instr->data_type->class != HLSL_CLASS_NULL) -+ return false; -+ -+ if (!(c = hlsl_new_uint_constant(ctx, 0, &instr->loc))) -+ return false; -+ -+ list_add_before(&instr->entry, &c->entry); -+ hlsl_replace_node(instr, c); -+ -+ return true; -+} -+ - static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - { - struct replace_state_context *replace_context = context; -@@ -1160,6 +1558,93 @@ static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_no - return true; - } - -+enum state_property_component_type -+{ -+ FX_BOOL, -+ FX_FLOAT, -+ FX_UINT, -+ FX_UINT8, -+ FX_DEPTHSTENCIL, -+ FX_RASTERIZER, -+ FX_DOMAINSHADER, -+ FX_HULLSHADER, -+ FX_COMPUTESHADER, -+ FX_TEXTURE, -+ FX_DEPTHSTENCILVIEW, -+ FX_RENDERTARGETVIEW, -+ FX_BLEND, -+ FX_VERTEXSHADER, -+ FX_PIXELSHADER, -+}; -+ -+static inline bool is_object_fx_type(enum state_property_component_type type) -+{ -+ switch (type) -+ { -+ case FX_DEPTHSTENCIL: -+ case FX_RASTERIZER: -+ case FX_DOMAINSHADER: -+ case FX_HULLSHADER: -+ case FX_COMPUTESHADER: -+ case FX_TEXTURE: -+ case FX_RENDERTARGETVIEW: -+ case FX_DEPTHSTENCILVIEW: -+ case FX_BLEND: -+ case FX_VERTEXSHADER: -+ case FX_PIXELSHADER: -+ return true; -+ default: -+ return false; -+ } -+} -+ -+static inline enum hlsl_type_class hlsl_type_class_from_fx_type(enum state_property_component_type type) -+{ -+ switch (type) -+ { -+ case FX_DEPTHSTENCIL: -+ return HLSL_CLASS_DEPTH_STENCIL_STATE; -+ case FX_RASTERIZER: -+ return HLSL_CLASS_RASTERIZER_STATE; -+ case FX_DOMAINSHADER: -+ return HLSL_CLASS_DOMAIN_SHADER; -+ case FX_HULLSHADER: -+ return HLSL_CLASS_HULL_SHADER; -+ case FX_COMPUTESHADER: -+ return HLSL_CLASS_COMPUTE_SHADER; -+ case FX_TEXTURE: -+ return HLSL_CLASS_TEXTURE; -+ case FX_RENDERTARGETVIEW: -+ return HLSL_CLASS_RENDER_TARGET_VIEW; -+ case FX_DEPTHSTENCILVIEW: -+ return HLSL_CLASS_DEPTH_STENCIL_VIEW; -+ case FX_BLEND: -+ return HLSL_CLASS_BLEND_STATE; -+ case FX_VERTEXSHADER: -+ return HLSL_CLASS_VERTEX_SHADER; -+ case FX_PIXELSHADER: -+ return HLSL_CLASS_PIXEL_SHADER; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_component_type type) -+{ -+ switch (type) -+ { -+ case FX_BOOL: -+ return HLSL_TYPE_BOOL; -+ case FX_FLOAT: -+ return HLSL_TYPE_FLOAT; -+ case FX_UINT: -+ case FX_UINT8: -+ return HLSL_TYPE_UINT; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ - static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, - struct fx_write_context *fx) - { -@@ -1209,44 +1694,233 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - { NULL } - }; - -+ static const struct rhs_named_value depth_write_mask_values[] = -+ { -+ { "ZERO", 0 }, -+ { "ALL", 1 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value comparison_values[] = -+ { -+ { "NEVER", 1 }, -+ { "LESS", 2 }, -+ { "EQUAL", 3 }, -+ { "LESS_EQUAL", 4 }, -+ { "GREATER", 5 }, -+ { "NOT_EQUAL", 6 }, -+ { "GREATER_EQUAL", 7 }, -+ { "ALWAYS", 8 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value stencil_op_values[] = -+ { -+ { "KEEP", 1 }, -+ { "ZERO", 2 }, -+ { "REPLACE", 3 }, -+ { "INCR_SAT", 4 }, -+ { "DECR_SAT", 5 }, -+ { "INVERT", 6 }, -+ { "INCR", 7 }, -+ { "DECR", 8 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value fill_values[] = -+ { -+ { "WIREFRAME", 2 }, -+ { "SOLID", 3 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value cull_values[] = -+ { -+ { "NONE", 1 }, -+ { "FRONT", 2 }, -+ { "BACK", 3 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value blend_values[] = -+ { -+ { "ZERO", 1 }, -+ { "ONE", 2 }, -+ { "SRC_COLOR", 3 }, -+ { "INV_SRC_COLOR", 4 }, -+ { "SRC_ALPHA", 5 }, -+ { "INV_SRC_ALPHA", 6 }, -+ { "DEST_ALPHA", 7 }, -+ { "INV_DEST_ALPHA", 8 }, -+ { "DEST_COLOR", 9 }, -+ { "INV_DEST_COLOR", 10 }, -+ { "SRC_ALPHA_SAT", 11 }, -+ { "BLEND_FACTOR", 14 }, -+ { "INV_BLEND_FACTOR", 15 }, -+ { "SRC1_COLOR", 16 }, -+ { "INV_SRC1_COLOR", 17 }, -+ { "SRC1_ALPHA", 18 }, -+ { "INV_SRC1_ALPHA", 19 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value blendop_values[] = -+ { -+ { "ADD", 1 }, -+ { "SUBTRACT", 2 }, -+ { "REV_SUBTRACT", 3 }, -+ { "MIN", 4 }, -+ { "MAX", 5 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value bool_values[] = -+ { -+ { "FALSE", 0 }, -+ { "TRUE", 1 }, -+ { NULL } -+ }; -+ -+ static const struct rhs_named_value null_values[] = -+ { -+ { "NULL", 0 }, -+ { NULL } -+ }; -+ - static const struct state - { - const char *name; - enum hlsl_type_class container; -- enum hlsl_base_type type; -+ enum hlsl_type_class class; -+ enum state_property_component_type type; - unsigned int dimx; -+ unsigned int array_size; - uint32_t id; - const struct rhs_named_value *values; - } - states[] = - { -- { "Filter", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 45, filter_values }, -- { "AddressU", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 46, address_values }, -- { "AddressV", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 47, address_values }, -- { "AddressW", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 48, address_values }, -- { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 49 }, -- { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 50 }, -- { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 51, compare_func_values }, -- { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 4, 52 }, -- { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 53 }, -- { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 54 }, -- /* TODO: "Texture" field */ -+ { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, -+ { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, -+ { "BlendState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BLEND, 1, 1, 2 }, -+ { "RenderTargetView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 }, -+ { "DepthStencilView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 }, -+ -+ { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, -+ { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, -+ { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, -+ { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, -+ { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, -+ -+ { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12, fill_values }, -+ { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13, cull_values }, -+ { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 14, bool_values }, -+ { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, -+ { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 }, -+ { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 }, -+ { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 18, bool_values }, -+ { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 19, bool_values }, -+ { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 20, bool_values }, -+ { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 21, bool_values }, -+ -+ { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 22, bool_values }, -+ { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, depth_write_mask_values }, -+ { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, comparison_values }, -+ { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 25, bool_values }, -+ { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 }, -+ { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 }, -+ { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28, stencil_op_values }, -+ { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29, stencil_op_values }, -+ { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30, stencil_op_values }, -+ { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, comparison_values }, -+ { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, stencil_op_values }, -+ { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, stencil_op_values }, -+ { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, stencil_op_values }, -+ { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, comparison_values }, -+ -+ { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, filter_values }, -+ { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, address_values }, -+ { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47, address_values }, -+ { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48, address_values }, -+ { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 49 }, -+ { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50 }, -+ { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51, compare_func_values }, -+ { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 52 }, -+ { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 53 }, -+ { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 54 }, -+ { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 55, null_values }, -+ -+ { "HullShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER, 1, 1, 56 }, -+ { "DomainShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER, 1, 1, 57 }, -+ { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, - }; -+ -+ static const struct state fx_4_blend_states[] = -+ { -+ { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, -+ { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, -+ { "SrcBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, blend_values }, -+ { "DestBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, blend_values }, -+ { "BlendOp", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, blendop_values }, -+ { "SrcBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, blend_values }, -+ { "DestBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, blend_values }, -+ { "BlendOpAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, blendop_values }, -+ { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, -+ }; -+ -+ static const struct state fx_5_blend_states[] = -+ { -+ { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, -+ { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, -+ { "SrcBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 38, blend_values }, -+ { "DestBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 39, blend_values }, -+ { "BlendOp", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 40, blendop_values }, -+ { "SrcBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 41, blend_values }, -+ { "DestBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 42, blend_values }, -+ { "BlendOpAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 43, blendop_values }, -+ { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, -+ }; -+ -+ struct state_table -+ { -+ const struct state *ptr; -+ unsigned int count; -+ } table; -+ - const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); - struct replace_state_context replace_context; -+ struct hlsl_type *state_type = NULL; - struct hlsl_ir_node *node, *cast; - const struct state *state = NULL; - struct hlsl_ctx *ctx = fx->ctx; -- struct hlsl_type *state_type; -+ enum hlsl_base_type base_type; - unsigned int i; -- bool progress; - -- for (i = 0; i < ARRAY_SIZE(states); ++i) -+ if (type->class == HLSL_CLASS_BLEND_STATE) - { -- if (type->class == states[i].container -- && !ascii_strcasecmp(entry->name, states[i].name)) -+ if (ctx->profile->major_version == 4) -+ { -+ table.ptr = fx_4_blend_states; -+ table.count = ARRAY_SIZE(fx_4_blend_states); -+ } -+ else - { -- state = &states[i]; -+ table.ptr = fx_5_blend_states; -+ table.count = ARRAY_SIZE(fx_5_blend_states); -+ } -+ } -+ else -+ { -+ table.ptr = states; -+ table.count = ARRAY_SIZE(states); -+ } -+ -+ for (i = 0; i < table.count; ++i) -+ { -+ if (type->class == table.ptr[i].container -+ && !ascii_strcasecmp(entry->name, table.ptr[i].name)) -+ { -+ state = &table.ptr[i]; - break; - } - } -@@ -1264,69 +1938,327 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl - return; - } - -+ if (entry->lhs_has_index && state->array_size == 1) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Can't use array-style access for non-array state %s.", -+ entry->name); -+ return; -+ } -+ -+ if (!entry->lhs_has_index && state->array_size > 1) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected array index for array state %s.", -+ entry->name); -+ return; -+ } -+ -+ if (entry->lhs_has_index && (state->array_size <= entry->lhs_index)) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid element index %u for the state %s[%u].", -+ entry->lhs_index, state->name, state->array_size); -+ return; -+ } -+ - entry->name_id = state->id; - - replace_context.values = state->values; - replace_context.var = var; - -- /* Turned named constants to actual constants. */ -+ /* Turn named constants to actual constants. */ -+ hlsl_transform_ir(ctx, lower_null_constant, entry->instrs, NULL); - hlsl_transform_ir(ctx, replace_state_block_constant, entry->instrs, &replace_context); -+ hlsl_run_const_passes(ctx, entry->instrs); - -- if (state->dimx) -- state_type = hlsl_get_vector_type(ctx, state->type, state->dimx); -- else -- state_type = hlsl_get_scalar_type(ctx, state->type); -+ /* Now cast and run folding again. */ -+ -+ if (is_object_fx_type(state->type)) -+ { -+ node = entry->args->node; -+ -+ switch (node->type) -+ { -+ case HLSL_IR_LOAD: -+ { -+ struct hlsl_ir_load *load = hlsl_ir_load(node); -+ -+ if (load->src.path_len) -+ hlsl_fixme(ctx, &ctx->location, "Arrays are not supported for RHS."); -+ -+ if (load->src.var->data_type->class != hlsl_type_class_from_fx_type(state->type)) -+ { -+ hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Type mismatch for the %s state value", -+ entry->name); -+ } -+ -+ break; -+ } -+ case HLSL_IR_CONSTANT: -+ { -+ struct hlsl_ir_constant *c = hlsl_ir_constant(node); -+ struct hlsl_type *data_type = c->node.data_type; -+ -+ if (data_type->class == HLSL_CLASS_SCALAR && data_type->e.numeric.type == HLSL_TYPE_UINT) -+ { -+ if (c->value.u[0].u != 0) -+ hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Only 0 integer constants are allowed for object-typed fields."); -+ } -+ else -+ { -+ hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Unexpected constant used for object-typed field."); -+ } -+ -+ break; -+ } -+ default: -+ hlsl_fixme(ctx, &ctx->location, "Unhandled node type for object-typed field."); -+ } - -- /* Cast to expected property type. */ -- node = entry->args->node; -- if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) - return; -- list_add_after(&node->entry, &cast->entry); -+ } - -- hlsl_src_remove(entry->args); -- hlsl_src_from_node(entry->args, cast); -+ base_type = hlsl_type_from_fx_type(state->type); -+ switch (state->class) -+ { -+ case HLSL_CLASS_VECTOR: -+ state_type = hlsl_get_vector_type(ctx, base_type, state->dimx); -+ break; -+ case HLSL_CLASS_SCALAR: -+ state_type = hlsl_get_scalar_type(ctx, base_type); -+ break; -+ case HLSL_CLASS_TEXTURE: -+ hlsl_fixme(ctx, &ctx->location, "Object type fields are not supported."); -+ break; -+ default: -+ ; -+ } - -- do -+ if (state_type) - { -- progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); -- progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); -- } while (progress); -+ node = entry->args->node; -+ if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) -+ return; -+ list_add_after(&node->entry, &cast->entry); -+ -+ /* FX_UINT8 values are using 32-bits in the binary. Mask higher 24 bits for those. */ -+ if (state->type == FX_UINT8) -+ { -+ struct hlsl_ir_node *mask; -+ -+ if (!(mask = hlsl_new_uint_constant(ctx, 0xff, &var->loc))) -+ return; -+ list_add_after(&cast->entry, &mask->entry); -+ -+ if (!(cast = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, cast, mask))) -+ return; -+ list_add_after(&mask->entry, &cast->entry); -+ } -+ -+ hlsl_src_remove(entry->args); -+ hlsl_src_from_node(entry->args, cast); -+ -+ hlsl_run_const_passes(ctx, entry->instrs); -+ } - } - --static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -+static bool decompose_fx_4_state_add_entries(struct hlsl_state_block *block, unsigned int entry_index, -+ unsigned int count) - { -- uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i, j; -- struct vkd3d_bytecode_buffer *buffer = &fx->structured; -- uint32_t count_offset, count; -+ if (!vkd3d_array_reserve((void **)&block->entries, &block->capacity, block->count + count, sizeof(*block->entries))) -+ return false; - -- for (i = 0; i < elements_count; ++i) -+ if (entry_index != block->count - 1) - { -- struct hlsl_state_block *block; -+ memmove(&block->entries[entry_index + count + 1], &block->entries[entry_index + 1], -+ (block->count - entry_index - 1) * sizeof(*block->entries)); -+ } -+ block->count += count; - -- count_offset = put_u32(buffer, 0); -+ return true; -+} -+ -+static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, struct hlsl_state_block *block, -+ unsigned int entry_index, struct fx_write_context *fx) -+{ -+ struct hlsl_state_block_entry *entry = block->entries[entry_index]; -+ const struct state_block_function_info *info; -+ struct function_component components[9]; -+ struct hlsl_ctx *ctx = fx->ctx; -+ unsigned int i; -+ -+ if (!entry->is_function_call) -+ return 1; -+ -+ if (!(info = get_state_block_function_info(entry->name))) -+ return 1; -+ -+ if (info->min_profile > ctx->profile->major_version) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, -+ "State %s is not supported for this profile.", entry->name); -+ return 1; -+ } -+ -+ /* For single argument case simply replace the name. */ -+ if (info->min_args == info->max_args && info->min_args == 1) -+ { -+ vkd3d_free(entry->name); -+ entry->name = hlsl_strdup(ctx, info->components[0].name); -+ return 1; -+ } -+ -+ if (!decompose_fx_4_state_add_entries(block, entry_index, entry->args_count - 1)) -+ return 1; - -- count = 0; -- if (var->state_blocks) -+ get_state_block_function_components(info, components, entry->args_count); -+ -+ for (i = 0; i < entry->args_count; ++i) -+ { -+ const struct function_component *comp = &components[i]; -+ unsigned int arg_index = (i + 1) % entry->args_count; -+ block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, comp->name, -+ comp->lhs_has_index, comp->lhs_index, arg_index); -+ } -+ hlsl_free_state_block_entry(entry); -+ -+ return entry->args_count; -+} -+ -+/* For some states assignment sets all of the elements. This behaviour is limited to certain states of BlendState -+ object, and only when fx_5_0 profile is used. */ -+static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *var, struct hlsl_state_block *block, -+ unsigned int entry_index, struct fx_write_context *fx) -+{ -+ static const char *states[] = { "SrcBlend", "DestBlend", "BlendOp", "SrcBlendAlpha", "DestBlendAlpha", "BlendOpAlpha" }; -+ const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); -+ struct hlsl_state_block_entry *entry = block->entries[entry_index]; -+ static const unsigned int array_size = 8; -+ struct hlsl_ctx *ctx = fx->ctx; -+ bool found = false; -+ unsigned int i; -+ -+ if (type->class != HLSL_CLASS_BLEND_STATE) -+ return 1; -+ if (ctx->profile->major_version != 5) -+ return 1; -+ if (entry->lhs_has_index) -+ return 1; -+ -+ for (i = 0; i < ARRAY_SIZE(states); ++i) -+ { -+ if (!ascii_strcasecmp(entry->name, states[i])) - { -- block = var->state_blocks[i]; -+ found = true; -+ break; -+ } -+ } - -- for (j = 0; j < block->count; ++j) -- { -- struct hlsl_state_block_entry *entry = block->entries[j]; -+ if (!found) -+ return 1; -+ -+ if (!decompose_fx_4_state_add_entries(block, entry_index, array_size - 1)) -+ return 1; -+ -+ block->entries[entry_index]->lhs_has_index = true; -+ for (i = 1; i < array_size; ++i) -+ { -+ block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, -+ entry->name, true, i, 0); -+ } - -- /* Skip if property is reassigned later. This will use the last assignment. */ -- if (state_block_contains_state(entry->name, j + 1, block)) -- continue; -+ return array_size; -+} - -- /* Resolve special constant names and property names. */ -- resolve_fx_4_state_block_values(var, entry, fx); -+static unsigned int decompose_fx_4_state_block(struct hlsl_ir_var *var, struct hlsl_state_block *block, -+ unsigned int entry_index, struct fx_write_context *fx) -+{ -+ struct hlsl_state_block_entry *entry = block->entries[entry_index]; - -- write_fx_4_state_assignment(var, entry, fx); -- ++count; -- } -+ if (entry->is_function_call) -+ return decompose_fx_4_state_function_call(var, block, entry_index, fx); -+ -+ return decompose_fx_4_state_block_expand_array(var, block, entry_index, fx); -+} -+ -+static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, -+ uint32_t count_offset, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ struct hlsl_state_block *block; -+ uint32_t i, count = 0; -+ -+ if (var->state_blocks) -+ { -+ block = var->state_blocks[block_index]; -+ -+ for (i = 0; i < block->count;) -+ { -+ i += decompose_fx_4_state_block(var, block, i, fx); - } - -- set_u32(buffer, count_offset, count); -+ for (i = 0; i < block->count; ++i) -+ { -+ struct hlsl_state_block_entry *entry = block->entries[i]; -+ -+ /* Skip if property is reassigned later. This will use the last assignment. */ -+ if (state_block_contains_state(entry, i + 1, block)) -+ continue; -+ -+ /* Resolve special constant names and property names. */ -+ resolve_fx_4_state_block_values(var, entry, fx); -+ -+ write_fx_4_state_assignment(var, entry, fx); -+ ++count; -+ } -+ } -+ -+ set_u32(buffer, count_offset, count); -+} -+ -+static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i; -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t count_offset; -+ -+ for (i = 0; i < elements_count; ++i) -+ { -+ count_offset = put_u32(buffer, 0); -+ -+ write_fx_4_state_block(var, i, count_offset, fx); -+ } -+} -+ -+static void write_fx_4_shader_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); -+ unsigned int i; -+ -+ /* FIXME: write shader blobs, once parser support works. */ -+ for (i = 0; i < elements_count; ++i) -+ put_u32(buffer, 0); -+} -+ -+static void write_fx_5_shader_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) -+{ -+ struct vkd3d_bytecode_buffer *buffer = &fx->structured; -+ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); -+ unsigned int i; -+ -+ /* FIXME: write shader blobs, once parser support works. */ -+ for (i = 0; i < elements_count; ++i) -+ { -+ put_u32(buffer, 0); /* Blob offset */ -+ put_u32(buffer, 0); /* SODecl[0] offset */ -+ put_u32(buffer, 0); /* SODecl[1] offset */ -+ put_u32(buffer, 0); /* SODecl[2] offset */ -+ put_u32(buffer, 0); /* SODecl[3] offset */ -+ put_u32(buffer, 0); /* SODecl count */ -+ put_u32(buffer, 0); /* Rasterizer stream */ -+ put_u32(buffer, 0); /* Interface bindings count */ -+ put_u32(buffer, 0); /* Interface initializer offset */ - } - } - -@@ -1336,7 +2268,7 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); - struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t semantic_offset, bind_point = ~0u; -- uint32_t name_offset, type_offset, i; -+ uint32_t name_offset, type_offset; - struct hlsl_ctx *ctx = fx->ctx; - - if (var->reg_reservation.reg_type) -@@ -1344,7 +2276,7 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - - type_offset = write_type(var->data_type, fx); - name_offset = write_string(var->name, fx); -- semantic_offset = write_string(var->semantic.name, fx); -+ semantic_offset = write_string(var->semantic.raw_name, fx); - - put_u32(buffer, name_offset); - put_u32(buffer, type_offset); -@@ -1373,9 +2305,14 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_VERTEX_SHADER: -- /* FIXME: write shader blobs, once parser support works. */ -- for (i = 0; i < elements_count; ++i) -- put_u32(buffer, 0); -+ write_fx_4_shader_initializer(var, fx); -+ fx->shader_count += elements_count; -+ break; -+ -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ write_fx_5_shader_initializer(var, fx); - fx->shader_count += elements_count; - break; - -@@ -1383,19 +2320,37 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ - fx->dsv_count += elements_count; - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: -+ write_fx_4_state_object_initializer(var, fx); -+ fx->depth_stencil_state_count += elements_count; -+ break; -+ - case HLSL_CLASS_SAMPLER: - write_fx_4_state_object_initializer(var, fx); - fx->sampler_state_count += elements_count; - break; - -+ case HLSL_CLASS_RASTERIZER_STATE: -+ write_fx_4_state_object_initializer(var, fx); -+ fx->rasterizer_state_count += elements_count; -+ break; -+ -+ case HLSL_CLASS_BLEND_STATE: -+ write_fx_4_state_object_initializer(var, fx); -+ fx->blend_state_count += elements_count; -+ break; -+ -+ case HLSL_CLASS_STRING: -+ write_fx_4_string_initializer(var, fx); -+ fx->string_count += elements_count; -+ break; -+ - default: -- hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", -- type->e.numeric.type); -+ hlsl_fixme(ctx, &ctx->location, "Writing initializer for object class %u is not implemented.", -+ type->class); - } - -- put_u32(buffer, 0); /* Annotations count */ -- if (has_annotations(var)) -- hlsl_fixme(ctx, &ctx->location, "Writing annotations for object variables is not implemented."); -+ write_fx_4_annotations(var->annotations, fx); - - ++fx->object_variable_count; - } -@@ -1438,9 +2393,7 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx - } - else - { -- put_u32(buffer, 0); /* Annotations count */ -- if (b->annotations) -- hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); -+ write_fx_4_annotations(b->annotations, fx); - ++fx->buffer_count; - } - -@@ -1464,6 +2417,9 @@ static void write_buffers(struct fx_write_context *fx, bool shared) - { - struct hlsl_buffer *buffer; - -+ if (shared && !fx->child_effect) -+ return; -+ - LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) - { - if (!buffer->size && !fx->include_empty_buffers) -@@ -1483,11 +2439,22 @@ static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struc - - switch (type->class) - { -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_STRING: -+ return true; -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ if (ctx->profile->major_version < 5) -+ return false; - return true; - case HLSL_CLASS_UAV: - if (ctx->profile->major_version < 5) -@@ -1495,8 +2462,6 @@ static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struc - if (type->e.resource.rasteriser_ordered) - return false; - return true; -- case HLSL_CLASS_VERTEX_SHADER: -- return true; - - default: - return false; -@@ -1549,11 +2514,11 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, fx.shared_object_count); - put_u32(&buffer, fx.technique_count); - size_offset = put_u32(&buffer, 0); /* Unstructured size. */ -- put_u32(&buffer, 0); /* String count. */ -+ put_u32(&buffer, fx.string_count); - put_u32(&buffer, fx.texture_count); -- put_u32(&buffer, 0); /* Depth stencil state count. */ -- put_u32(&buffer, 0); /* Blend state count. */ -- put_u32(&buffer, 0); /* Rasterizer state count. */ -+ put_u32(&buffer, fx.depth_stencil_state_count); -+ put_u32(&buffer, fx.blend_state_count); -+ put_u32(&buffer, fx.rasterizer_state_count); - put_u32(&buffer, fx.sampler_state_count); - put_u32(&buffer, fx.rtv_count); - put_u32(&buffer, fx.dsv_count); -@@ -1607,11 +2572,11 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) - put_u32(&buffer, fx.shared_object_count); - put_u32(&buffer, fx.technique_count); - size_offset = put_u32(&buffer, 0); /* Unstructured size. */ -- put_u32(&buffer, 0); /* String count. */ -+ put_u32(&buffer, fx.string_count); - put_u32(&buffer, fx.texture_count); -- put_u32(&buffer, 0); /* Depth stencil state count. */ -- put_u32(&buffer, 0); /* Blend state count. */ -- put_u32(&buffer, 0); /* Rasterizer state count. */ -+ put_u32(&buffer, fx.depth_stencil_state_count); -+ put_u32(&buffer, fx.blend_state_count); -+ put_u32(&buffer, fx.rasterizer_state_count); - put_u32(&buffer, fx.sampler_state_count); - put_u32(&buffer, fx.rtv_count); - put_u32(&buffer, fx.dsv_count); -diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c -index 3e482a5fc70..d1f02ab568b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/glsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c -@@ -18,10 +18,23 @@ - - #include "vkd3d_shader_private.h" - -+struct glsl_src -+{ -+ struct vkd3d_string_buffer *str; -+}; -+ -+struct glsl_dst -+{ -+ const struct vkd3d_shader_dst_param *vsir; -+ struct vkd3d_string_buffer *register_name; -+ struct vkd3d_string_buffer *mask; -+}; -+ - struct vkd3d_glsl_generator - { - struct vsir_program *program; -- struct vkd3d_string_buffer buffer; -+ struct vkd3d_string_buffer_cache string_buffers; -+ struct vkd3d_string_buffer *buffer; - struct vkd3d_shader_location location; - struct vkd3d_shader_message_context *message_context; - unsigned int indent; -@@ -45,18 +58,149 @@ static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigne - vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); - } - -+static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, -+ struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_register *reg) -+{ -+ switch (reg->type) -+ { -+ case VKD3DSPR_TEMP: -+ vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); -+ break; -+ -+ default: -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled register type %#x.", reg->type); -+ vkd3d_string_buffer_printf(buffer, "", reg->type); -+ break; -+ } -+} -+ -+static void shader_glsl_print_swizzle(struct vkd3d_string_buffer *buffer, uint32_t swizzle, uint32_t mask) -+{ -+ const char swizzle_chars[] = "xyzw"; -+ unsigned int i; -+ -+ vkd3d_string_buffer_printf(buffer, "."); -+ for (i = 0; i < VKD3D_VEC4_SIZE; ++i) -+ { -+ if (mask & (VKD3DSP_WRITEMASK_0 << i)) -+ vkd3d_string_buffer_printf(buffer, "%c", swizzle_chars[vsir_swizzle_get_component(swizzle, i)]); -+ } -+} -+ -+static void shader_glsl_print_write_mask(struct vkd3d_string_buffer *buffer, uint32_t write_mask) -+{ -+ vkd3d_string_buffer_printf(buffer, "."); -+ if (write_mask & VKD3DSP_WRITEMASK_0) -+ vkd3d_string_buffer_printf(buffer, "x"); -+ if (write_mask & VKD3DSP_WRITEMASK_1) -+ vkd3d_string_buffer_printf(buffer, "y"); -+ if (write_mask & VKD3DSP_WRITEMASK_2) -+ vkd3d_string_buffer_printf(buffer, "z"); -+ if (write_mask & VKD3DSP_WRITEMASK_3) -+ vkd3d_string_buffer_printf(buffer, "w"); -+} -+ -+static void glsl_src_cleanup(struct glsl_src *src, struct vkd3d_string_buffer_cache *cache) -+{ -+ vkd3d_string_buffer_release(cache, src->str); -+} -+ -+static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) -+{ -+ const struct vkd3d_shader_register *reg = &vsir_src->reg; -+ -+ glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ if (reg->non_uniform) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled 'non-uniform' modifier."); -+ if (vsir_src->modifiers) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); -+ -+ shader_glsl_print_register_name(glsl_src->str, gen, reg); -+ if (reg->dimension == VSIR_DIMENSION_VEC4) -+ shader_glsl_print_swizzle(glsl_src->str, vsir_src->swizzle, mask); -+} -+ -+static void glsl_dst_cleanup(struct glsl_dst *dst, struct vkd3d_string_buffer_cache *cache) -+{ -+ vkd3d_string_buffer_release(cache, dst->mask); -+ vkd3d_string_buffer_release(cache, dst->register_name); -+} -+ -+static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_dst_param *vsir_dst) -+{ -+ uint32_t write_mask = vsir_dst->write_mask; -+ -+ if (ins->flags & VKD3DSI_PRECISE_XYZW) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled 'precise' modifier."); -+ if (vsir_dst->reg.non_uniform) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled 'non-uniform' modifier."); -+ -+ glsl_dst->vsir = vsir_dst; -+ glsl_dst->register_name = vkd3d_string_buffer_get(&gen->string_buffers); -+ glsl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers); -+ -+ shader_glsl_print_register_name(glsl_dst->register_name, gen, &vsir_dst->reg); -+ shader_glsl_print_write_mask(glsl_dst->mask, write_mask); -+ -+ return write_mask; -+} -+ -+static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment( -+ struct vkd3d_glsl_generator *gen, struct glsl_dst *dst, const char *format, ...) -+{ -+ va_list args; -+ -+ if (dst->vsir->shift) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); -+ if (dst->vsir->modifiers) -+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -+ "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); -+ -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); -+ -+ va_start(args, format); -+ vkd3d_string_buffer_vprintf(gen->buffer, format, args); -+ va_end(args); -+ -+ vkd3d_string_buffer_printf(gen->buffer, ";\n"); -+} -+ - static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { -- shader_glsl_print_indent(&gen->buffer, gen->indent); -- vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "/* */\n", ins->opcode); - vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, -- "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); -+ "Internal compiler error: Unhandled instruction %#x.", ins->opcode); - } - --static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, -- const struct vkd3d_shader_instruction *ins) -+static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) -+{ -+ struct glsl_src src; -+ struct glsl_dst dst; -+ uint32_t mask; -+ -+ mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); -+ glsl_src_init(&src, gen, &ins->src[0], mask); -+ -+ shader_glsl_print_assignment(gen, &dst, "%s", src.str->buffer); -+ -+ glsl_src_cleanup(&src, &gen->string_buffers); -+ glsl_dst_cleanup(&dst, &gen->string_buffers); -+} -+ -+static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) - { -- const struct vkd3d_shader_version *version = &generator->program->shader_version; -+ const struct vkd3d_shader_version *version = &gen->program->shader_version; - - /* - * TODO: Implement in_subroutine -@@ -64,45 +208,59 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, - */ - if (version->major >= 4) - { -- shader_glsl_print_indent(&generator->buffer, generator->indent); -- vkd3d_string_buffer_printf(&generator->buffer, "return;\n"); -+ shader_glsl_print_indent(gen->buffer, gen->indent); -+ vkd3d_string_buffer_printf(gen->buffer, "return;\n"); - } - } - --static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator, -- const struct vkd3d_shader_instruction *instruction) -+static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, -+ const struct vkd3d_shader_instruction *ins) - { -- generator->location = instruction->location; -+ gen->location = ins->location; - -- switch (instruction->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL_INPUT: - case VKD3DSIH_DCL_OUTPUT: - case VKD3DSIH_DCL_OUTPUT_SIV: - case VKD3DSIH_NOP: - break; -+ case VKD3DSIH_MOV: -+ shader_glsl_mov(gen, ins); -+ break; - case VKD3DSIH_RET: -- shader_glsl_ret(generator, instruction); -+ shader_glsl_ret(gen, ins); - break; - default: -- shader_glsl_unhandled(generator, instruction); -+ shader_glsl_unhandled(gen, ins); - break; - } - } - -+static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) -+{ -+ const struct vsir_program *program = gen->program; -+ struct vkd3d_string_buffer *buffer = gen->buffer; -+ -+ if (program->temp_count) -+ vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n\n", program->temp_count); -+} -+ - static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) - { - const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; -- struct vkd3d_string_buffer *buffer = &gen->buffer; -+ struct vkd3d_string_buffer *buffer = gen->buffer; - unsigned int i; - void *code; - -- ERR("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); -+ MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); - - vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); - - vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); - -+ shader_glsl_generate_declarations(gen); -+ - vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); - - ++gen->indent; -@@ -132,7 +290,8 @@ static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struc - - static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) - { -- vkd3d_string_buffer_cleanup(&gen->buffer); -+ vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); -+ vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); - } - - static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, -@@ -140,7 +299,8 @@ static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, - { - memset(gen, 0, sizeof(*gen)); - gen->program = program; -- vkd3d_string_buffer_init(&gen->buffer); -+ vkd3d_string_buffer_cache_init(&gen->string_buffers); -+ gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers); - gen->message_context = message_context; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -index 99214fba6de..bd5baacd83d 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c -@@ -134,7 +134,7 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) - return hlsl_get_var(scope->upper, name); - } - --static void free_state_block_entry(struct hlsl_state_block_entry *entry) -+void hlsl_free_state_block_entry(struct hlsl_state_block_entry *entry) - { - unsigned int i; - -@@ -151,9 +151,9 @@ void hlsl_free_state_block(struct hlsl_state_block *state_block) - { - unsigned int k; - -- assert(state_block); -+ VKD3D_ASSERT(state_block); - for (k = 0; k < state_block->count; ++k) -- free_state_block_entry(state_block->entries[k]); -+ hlsl_free_state_block_entry(state_block->entries[k]); - vkd3d_free(state_block->entries); - vkd3d_free(state_block); - } -@@ -167,6 +167,15 @@ void hlsl_free_var(struct hlsl_ir_var *decl) - for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) - vkd3d_free((void *)decl->objects_usage[k]); - -+ if (decl->default_values) -+ { -+ unsigned int component_count = hlsl_type_component_count(decl->data_type); -+ -+ for (k = 0; k < component_count; ++k) -+ vkd3d_free((void *)decl->default_values[k].string); -+ vkd3d_free(decl->default_values); -+ } -+ - for (i = 0; i < decl->state_block_count; ++i) - hlsl_free_state_block(decl->state_blocks[i]); - vkd3d_free(decl->state_blocks); -@@ -367,15 +376,24 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type - type->reg_size[HLSL_REGSET_UAVS] = 1; - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_NULL: - break; - } - } -@@ -435,21 +453,30 @@ static bool type_is_single_component(const struct hlsl_type *type) - { - switch (type->class) - { -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_NULL: - return true; - - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: - case HLSL_CLASS_STRUCT: - case HLSL_CLASS_ARRAY: -+ case HLSL_CLASS_CONSTANT_BUFFER: - return false; - - case HLSL_CLASS_EFFECT_GROUP: -@@ -474,13 +501,13 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - struct hlsl_type *type = *type_ptr; - unsigned int index = *index_ptr; - -- assert(!type_is_single_component(type)); -- assert(index < hlsl_type_component_count(type)); -+ VKD3D_ASSERT(!type_is_single_component(type)); -+ VKD3D_ASSERT(index < hlsl_type_component_count(type)); - - switch (type->class) - { - case HLSL_CLASS_VECTOR: -- assert(index < type->dimx); -+ VKD3D_ASSERT(index < type->dimx); - *type_ptr = hlsl_get_scalar_type(ctx, type->e.numeric.type); - *index_ptr = 0; - return index; -@@ -490,7 +517,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - unsigned int y = index / type->dimx, x = index % type->dimx; - bool row_major = hlsl_type_is_row_major(type); - -- assert(index < type->dimx * type->dimy); -+ VKD3D_ASSERT(index < type->dimx * type->dimy); - *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, row_major ? type->dimx : type->dimy); - *index_ptr = row_major ? x : y; - return row_major ? y : x; -@@ -504,7 +531,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - *type_ptr = type->e.array.type; - *index_ptr = index % elem_comp_count; - array_index = index / elem_comp_count; -- assert(array_index < type->e.array.elements_count); -+ VKD3D_ASSERT(array_index < type->e.array.elements_count); - return array_index; - } - -@@ -528,6 +555,12 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, - vkd3d_unreachable(); - } - -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ { -+ *type_ptr = type->e.resource.format; -+ return traverse_path_from_component_index(ctx, type_ptr, index_ptr); -+ } -+ - default: - vkd3d_unreachable(); - } -@@ -556,12 +589,14 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - - switch (type->class) - { -- case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: -- case HLSL_CLASS_MATRIX: - offset[HLSL_REGSET_NUMERIC] += idx; - break; - -+ case HLSL_CLASS_MATRIX: -+ offset[HLSL_REGSET_NUMERIC] += 4 * idx; -+ break; -+ - case HLSL_CLASS_STRUCT: - for (r = 0; r <= HLSL_REGSET_LAST; ++r) - offset[r] += type->e.record.fields[idx].reg_offset[r]; -@@ -577,21 +612,31 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty - } - break; - -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: -- assert(idx == 0); -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: -+ VKD3D_ASSERT(idx == 0); - break; - - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_SCALAR: -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_NULL: - vkd3d_unreachable(); - } - type = next_type; -@@ -638,9 +683,9 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d - deref->rel_offset.node = NULL; - deref->const_offset = 0; - -- assert(chain); -+ VKD3D_ASSERT(chain); - if (chain->type == HLSL_IR_INDEX) -- assert(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); -+ VKD3D_ASSERT(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); - - /* Find the length of the index chain */ - chain_len = 0; -@@ -687,7 +732,7 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d - chain_len++; - ptr = index->val.node; - } -- assert(deref->path_len == load->src.path_len + chain_len); -+ VKD3D_ASSERT(deref->path_len == load->src.path_len + chain_len); - - return true; - } -@@ -697,7 +742,7 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de - struct hlsl_type *type; - unsigned int i; - -- assert(deref); -+ VKD3D_ASSERT(deref); - - if (hlsl_deref_is_lowered(deref)) - return deref->data_type; -@@ -752,7 +797,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl - hlsl_src_from_node(&deref->path[deref_path_len++], c); - } - -- assert(deref_path_len == deref->path_len); -+ VKD3D_ASSERT(deref_path_len == deref->path_len); - - return true; - } -@@ -760,7 +805,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl - struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, const struct hlsl_type *type, - struct hlsl_ir_node *idx) - { -- assert(idx); -+ VKD3D_ASSERT(idx); - - switch (type->class) - { -@@ -780,7 +825,7 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co - { - struct hlsl_ir_constant *c = hlsl_ir_constant(idx); - -- assert(c->value.u[0].u < type->e.record.field_count); -+ VKD3D_ASSERT(c->value.u[0].u < type->e.record.field_count); - return type->e.record.fields[c->value.u[0].u].type; - } - -@@ -865,6 +910,20 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim - return type; - } - -+struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format) -+{ -+ struct hlsl_type *type; -+ -+ if (!(type = hlsl_alloc(ctx, sizeof(*type)))) -+ return NULL; -+ type->class = HLSL_CLASS_CONSTANT_BUFFER; -+ type->dimy = 1; -+ type->e.resource.format = format; -+ hlsl_type_calculate_reg_size(ctx, type); -+ list_add_tail(&ctx->types, &type->entry); -+ return type; -+} -+ - static const char * get_case_insensitive_typename(const char *name) - { - static const char *const names[] = -@@ -876,6 +935,7 @@ static const char * get_case_insensitive_typename(const char *name) - "texture", - "vector", - "vertexshader", -+ "string", - }; - unsigned int i; - -@@ -956,14 +1016,25 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) - case HLSL_CLASS_ARRAY: - return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; - -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ return hlsl_type_component_count(type->e.resource.format); -+ -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_NULL: - return 1; - - case HLSL_CLASS_EFFECT_GROUP: -@@ -1038,14 +1109,25 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 - case HLSL_CLASS_TECHNIQUE: - return t1->e.version == t2->e.version; - -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); -+ -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_STRING: - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_NULL: - return true; - } - -@@ -1247,6 +1329,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const cha - list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); - else - list_add_tail(&ctx->globals->vars, &var->scope_entry); -+ var->is_synthetic = true; - } - return var; - } -@@ -1265,7 +1348,7 @@ bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struc - if (!other) - return true; - -- assert(!hlsl_deref_is_lowered(other)); -+ VKD3D_ASSERT(!hlsl_deref_is_lowered(other)); - - if (!init_deref(ctx, deref, other->var, other->path_len)) - return false; -@@ -1322,8 +1405,8 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls - struct hlsl_ir_store *store; - unsigned int i; - -- assert(lhs); -- assert(!hlsl_deref_is_lowered(lhs)); -+ VKD3D_ASSERT(lhs); -+ VKD3D_ASSERT(!hlsl_deref_is_lowered(lhs)); - - if (!(store = hlsl_alloc(ctx, sizeof(*store)))) - return NULL; -@@ -1394,7 +1477,7 @@ struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *t - { - struct hlsl_ir_constant *c; - -- assert(type->class <= HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(type->class <= HLSL_CLASS_VECTOR || type->class == HLSL_CLASS_NULL); - - if (!(c = hlsl_alloc(ctx, sizeof(*c)))) - return NULL; -@@ -1439,6 +1522,30 @@ struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n - return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &value, loc); - } - -+struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, -+ const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_string_constant *s; -+ -+ if (!(s = hlsl_alloc(ctx, sizeof(*s)))) -+ return NULL; -+ -+ init_node(&s->node, HLSL_IR_STRING_CONSTANT, ctx->builtin_types.string, loc); -+ -+ if (!(s->string = hlsl_strdup(ctx, str))) -+ { -+ hlsl_free_instr(&s->node); -+ return NULL; -+ } -+ return &s->node; -+} -+ -+struct hlsl_ir_node *hlsl_new_null_constant(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_constant_value value = { 0 }; -+ return hlsl_new_constant(ctx, ctx->builtin_types.null, &value, loc); -+} -+ - struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], - struct hlsl_type *data_type, const struct vkd3d_shader_location *loc) -@@ -1468,7 +1575,7 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; - -- assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); -+ VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); - return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); - } - -@@ -1477,8 +1584,8 @@ struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_ex - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; - -- assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); -- assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); -+ VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); -+ VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); - return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); - } - -@@ -1540,7 +1647,7 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl - struct hlsl_type *type; - unsigned int i; - -- assert(!hlsl_deref_is_lowered(deref)); -+ VKD3D_ASSERT(!hlsl_deref_is_lowered(deref)); - - type = hlsl_deref_get_type(ctx, deref); - if (idx) -@@ -1569,7 +1676,7 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls - /* This deref can only exists temporarily because it is not the real owner of its members. */ - struct hlsl_deref tmp_deref; - -- assert(deref->path_len >= 1); -+ VKD3D_ASSERT(deref->path_len >= 1); - - tmp_deref = *deref; - tmp_deref.path_len = deref->path_len - 1; -@@ -1674,7 +1781,7 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned - - if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) - return NULL; -- assert(hlsl_is_numeric_type(val->data_type)); -+ VKD3D_ASSERT(hlsl_is_numeric_type(val->data_type)); - if (components == 1) - type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); - else -@@ -1765,7 +1872,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type - } - - struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -- struct hlsl_block *block, const struct vkd3d_shader_location *loc) -+ struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, -+ unsigned int unroll_limit, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_loop *loop; - -@@ -1774,6 +1882,9 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); - hlsl_block_init(&loop->body); - hlsl_block_add_block(&loop->body, block); -+ -+ loop->unroll_type = unroll_type; -+ loop->unroll_limit = unroll_limit; - return &loop->node; - } - -@@ -1836,9 +1947,7 @@ static struct hlsl_ir_node *map_instr(const struct clone_instr_map *map, struct - return map->instrs[i].dst; - } - -- /* The block passed to hlsl_clone_block() should have been free of external -- * references. */ -- vkd3d_unreachable(); -+ return src; - } - - static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, -@@ -1846,7 +1955,7 @@ static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, - { - unsigned int i; - -- assert(!hlsl_deref_is_lowered(src)); -+ VKD3D_ASSERT(!hlsl_deref_is_lowered(src)); - - if (!init_deref(ctx, dst, src->var, src->path_len)) - return false; -@@ -1935,7 +2044,7 @@ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_ - if (!clone_block(ctx, &body, &src->body, map)) - return NULL; - -- if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) -+ if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) - { - hlsl_block_cleanup(&body); - return NULL; -@@ -1992,6 +2101,11 @@ static struct hlsl_ir_node *clone_resource_store(struct hlsl_ctx *ctx, - return &dst->node; - } - -+static struct hlsl_ir_node *clone_string_constant(struct hlsl_ctx *ctx, struct hlsl_ir_string_constant *src) -+{ -+ return hlsl_new_string_constant(ctx, src->string, &src->node.loc); -+} -+ - static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_store *src) - { - struct hlsl_ir_store *dst; -@@ -2034,6 +2148,43 @@ static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, - return hlsl_new_stateblock_constant(ctx, constant->name, &constant->node.loc); - } - -+struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, -+ struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, -+ unsigned int lhs_index, unsigned int arg_index) -+{ -+ struct hlsl_state_block_entry *entry; -+ struct clone_instr_map map = { 0 }; -+ -+ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) -+ return NULL; -+ entry->name = hlsl_strdup(ctx, name); -+ entry->lhs_has_index = lhs_has_index; -+ entry->lhs_index = lhs_index; -+ if (!(entry->instrs = hlsl_alloc(ctx, sizeof(*entry->instrs)))) -+ { -+ hlsl_free_state_block_entry(entry); -+ return NULL; -+ } -+ -+ entry->args_count = 1; -+ if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) -+ { -+ hlsl_free_state_block_entry(entry); -+ return NULL; -+ } -+ -+ hlsl_block_init(entry->instrs); -+ if (!clone_block(ctx, entry->instrs, src->instrs, &map)) -+ { -+ hlsl_free_state_block_entry(entry); -+ return NULL; -+ } -+ clone_src(&map, entry->args, &src->args[arg_index]); -+ vkd3d_free(map.instrs); -+ -+ return entry; -+} -+ - void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c) - { - hlsl_block_cleanup(&c->body); -@@ -2121,6 +2272,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, - case HLSL_IR_RESOURCE_STORE: - return clone_resource_store(ctx, map, hlsl_ir_resource_store(instr)); - -+ case HLSL_IR_STRING_CONSTANT: -+ return clone_string_constant(ctx, hlsl_ir_string_constant(instr)); -+ - case HLSL_IR_STORE: - return clone_store(ctx, map, hlsl_ir_store(instr)); - -@@ -2249,7 +2403,7 @@ void hlsl_pop_scope(struct hlsl_ctx *ctx) - { - struct hlsl_scope *prev_scope = ctx->cur_scope->upper; - -- assert(prev_scope); -+ VKD3D_ASSERT(prev_scope); - TRACE("Popping current scope.\n"); - ctx->cur_scope = prev_scope; - } -@@ -2327,17 +2481,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - switch (type->class) - { - case HLSL_CLASS_SCALAR: -- assert(type->e.numeric.type < ARRAY_SIZE(base_types)); -+ VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]); - return string; - - case HLSL_CLASS_VECTOR: -- assert(type->e.numeric.type < ARRAY_SIZE(base_types)); -+ VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); - return string; - - case HLSL_CLASS_MATRIX: -- assert(type->e.numeric.type < ARRAY_SIZE(base_types)); -+ VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); - vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); - return string; - -@@ -2375,15 +2529,15 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - return string; - } - -- assert(hlsl_is_numeric_type(type->e.resource.format)); -- assert(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); -+ VKD3D_ASSERT(hlsl_is_numeric_type(type->e.resource.format)); -+ VKD3D_ASSERT(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); - if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) - { - vkd3d_string_buffer_printf(string, "Buffer"); - } - else - { -- assert(type->sampler_dim < ARRAY_SIZE(dimensions)); -+ VKD3D_ASSERT(type->sampler_dim < ARRAY_SIZE(dimensions)); - vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); - } - if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) -@@ -2407,16 +2561,33 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru - } - return string; - -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ vkd3d_string_buffer_printf(string, "ConstantBuffer"); -+ if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) -+ { -+ vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); -+ hlsl_release_string_buffer(ctx, inner_string); -+ } -+ return string; -+ -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_NULL: - break; - } - -@@ -2513,19 +2684,21 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) - { - static const char * const names[] = - { -- [HLSL_IR_CALL ] = "HLSL_IR_CALL", -- [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", -- [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", -- [HLSL_IR_IF ] = "HLSL_IR_IF", -- [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", -- [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", -- [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", -- [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", -- [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", -- [HLSL_IR_RESOURCE_STORE] = "HLSL_IR_RESOURCE_STORE", -- [HLSL_IR_STORE ] = "HLSL_IR_STORE", -- [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", -- [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", -+ [HLSL_IR_CALL ] = "HLSL_IR_CALL", -+ [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", -+ [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", -+ [HLSL_IR_IF ] = "HLSL_IR_IF", -+ [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", -+ [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", -+ [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", -+ [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", -+ [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", -+ [HLSL_IR_RESOURCE_STORE ] = "HLSL_IR_RESOURCE_STORE", -+ [HLSL_IR_STRING_CONSTANT] = "HLSL_IR_STRING_CONSTANT", -+ [HLSL_IR_STORE ] = "HLSL_IR_STORE", -+ [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", -+ [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", -+ [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", - }; - - if (type >= ARRAY_SIZE(names)) -@@ -2544,7 +2717,7 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) - [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", - }; - -- assert(type < ARRAY_SIZE(names)); -+ VKD3D_ASSERT(type < ARRAY_SIZE(names)); - return names[type]; - } - -@@ -2634,7 +2807,7 @@ const char *debug_hlsl_writemask(unsigned int writemask) - char string[5]; - unsigned int i = 0, pos = 0; - -- assert(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); -+ VKD3D_ASSERT(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); - - while (writemask) - { -@@ -2653,7 +2826,7 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) - char string[5]; - unsigned int i; - -- assert(size <= ARRAY_SIZE(components)); -+ VKD3D_ASSERT(size <= ARRAY_SIZE(components)); - for (i = 0; i < size; ++i) - string[i] = components[hlsl_swizzle_get_component(swizzle, i)]; - string[size] = 0; -@@ -2735,6 +2908,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - static const char *const op_names[] = - { - [HLSL_OP0_VOID] = "void", -+ [HLSL_OP0_RASTERIZER_SAMPLE_COUNT] = "GetRenderTargetSampleCount", - - [HLSL_OP1_ABS] = "abs", - [HLSL_OP1_BIT_NOT] = "~", -@@ -2749,6 +2923,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP1_DSY_COARSE] = "dsy_coarse", - [HLSL_OP1_DSY_FINE] = "dsy_fine", - [HLSL_OP1_EXP2] = "exp2", -+ [HLSL_OP1_F16TOF32] = "f16tof32", - [HLSL_OP1_FLOOR] = "floor", - [HLSL_OP1_FRACT] = "fract", - [HLSL_OP1_LOG2] = "log2", -@@ -2790,6 +2965,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) - [HLSL_OP3_CMP] = "cmp", - [HLSL_OP3_DP2ADD] = "dp2add", - [HLSL_OP3_TERNARY] = "ternary", -+ [HLSL_OP3_MAD] = "mad", - }; - - return op_names[op]; -@@ -2875,7 +3051,7 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru - [HLSL_RESOURCE_RESINFO] = "resinfo", - }; - -- assert(load->load_type < ARRAY_SIZE(type_names)); -+ VKD3D_ASSERT(load->load_type < ARRAY_SIZE(type_names)); - vkd3d_string_buffer_printf(buffer, "%s(resource = ", type_names[load->load_type]); - dump_deref(buffer, &load->resource); - vkd3d_string_buffer_printf(buffer, ", sampler = "); -@@ -2929,6 +3105,11 @@ static void dump_ir_resource_store(struct vkd3d_string_buffer *buffer, const str - vkd3d_string_buffer_printf(buffer, ")"); - } - -+static void dump_ir_string(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_string_constant *string) -+{ -+ vkd3d_string_buffer_printf(buffer, "\"%s\"", debugstr_a(string->string)); -+} -+ - static void dump_ir_store(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_store *store) - { - vkd3d_string_buffer_printf(buffer, "= ("); -@@ -3048,6 +3229,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, - dump_ir_resource_store(buffer, hlsl_ir_resource_store(instr)); - break; - -+ case HLSL_IR_STRING_CONSTANT: -+ dump_ir_string(buffer, hlsl_ir_string_constant(instr)); -+ break; -+ - case HLSL_IR_STORE: - dump_ir_store(buffer, hlsl_ir_store(instr)); - break; -@@ -3086,12 +3271,45 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl - vkd3d_string_buffer_cleanup(&buffer); - } - -+void hlsl_dump_var_default_values(const struct hlsl_ir_var *var) -+{ -+ unsigned int k, component_count = hlsl_type_component_count(var->data_type); -+ struct vkd3d_string_buffer buffer; -+ -+ vkd3d_string_buffer_init(&buffer); -+ if (!var->default_values) -+ { -+ vkd3d_string_buffer_printf(&buffer, "var \"%s\" has no default values.\n", var->name); -+ vkd3d_string_buffer_trace(&buffer); -+ vkd3d_string_buffer_cleanup(&buffer); -+ return; -+ } -+ -+ vkd3d_string_buffer_printf(&buffer, "var \"%s\" default values:", var->name); -+ for (k = 0; k < component_count; ++k) -+ { -+ bool is_string = var->default_values[k].string; -+ -+ if (k % 4 == 0 || is_string) -+ vkd3d_string_buffer_printf(&buffer, "\n "); -+ -+ if (is_string) -+ vkd3d_string_buffer_printf(&buffer, " %s", debugstr_a(var->default_values[k].string)); -+ else -+ vkd3d_string_buffer_printf(&buffer, " 0x%08x", var->default_values[k].number.u); -+ } -+ vkd3d_string_buffer_printf(&buffer, "\n"); -+ -+ vkd3d_string_buffer_trace(&buffer); -+ vkd3d_string_buffer_cleanup(&buffer); -+} -+ - void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) - { - struct hlsl_src *src, *next; - -- assert(old->data_type->dimx == new->data_type->dimx); -- assert(old->data_type->dimy == new->data_type->dimy); -+ VKD3D_ASSERT(old->data_type->dimx == new->data_type->dimx); -+ VKD3D_ASSERT(old->data_type->dimy == new->data_type->dimy); - - LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) - { -@@ -3199,6 +3417,12 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) - vkd3d_free(load); - } - -+static void free_ir_string_constant(struct hlsl_ir_string_constant *string) -+{ -+ vkd3d_free(string->string); -+ vkd3d_free(string); -+} -+ - static void free_ir_resource_store(struct hlsl_ir_resource_store *store) - { - hlsl_cleanup_deref(&store->resource); -@@ -3243,7 +3467,7 @@ static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *cons - - void hlsl_free_instr(struct hlsl_ir_node *node) - { -- assert(list_empty(&node->uses)); -+ VKD3D_ASSERT(list_empty(&node->uses)); - - switch (node->type) - { -@@ -3283,6 +3507,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) - free_ir_resource_load(hlsl_ir_resource_load(node)); - break; - -+ case HLSL_IR_STRING_CONSTANT: -+ free_ir_string_constant(hlsl_ir_string_constant(node)); -+ break; -+ - case HLSL_IR_RESOURCE_STORE: - free_ir_resource_store(hlsl_ir_resource_store(node)); - break; -@@ -3319,9 +3547,25 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) - void hlsl_cleanup_semantic(struct hlsl_semantic *semantic) - { - vkd3d_free((void *)semantic->name); -+ vkd3d_free((void *)semantic->raw_name); - memset(semantic, 0, sizeof(*semantic)); - } - -+bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src) -+{ -+ *dst = *src; -+ dst->name = dst->raw_name = NULL; -+ if (src->name && !(dst->name = hlsl_strdup(ctx, src->name))) -+ return false; -+ if (src->raw_name && !(dst->raw_name = hlsl_strdup(ctx, src->raw_name))) -+ { -+ hlsl_cleanup_semantic(dst); -+ return false; -+ } -+ -+ return true; -+} -+ - static void free_function_decl(struct hlsl_ir_function_decl *decl) - { - unsigned int i; -@@ -3711,14 +3955,23 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) - } - - ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); -+ ctx->builtin_types.null = hlsl_new_type(ctx, "NULL", HLSL_CLASS_NULL, HLSL_TYPE_UINT, 1, 1); -+ ctx->builtin_types.string = hlsl_new_simple_type(ctx, "string", HLSL_CLASS_STRING); -+ hlsl_scope_add_type(ctx->globals, ctx->builtin_types.string); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pass", HLSL_CLASS_PASS)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pixelshader", HLSL_CLASS_PIXEL_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RasterizerState", HLSL_CLASS_RASTERIZER_STATE)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); -- hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "vertexshader", HLSL_CLASS_VERTEX_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "ComputeShader", HLSL_CLASS_COMPUTE_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DomainShader", HLSL_CLASS_DOMAIN_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "HullShader", HLSL_CLASS_HULL_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "GeometryShader", HLSL_CLASS_GEOMETRY_SHADER)); -+ hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "BlendState", HLSL_CLASS_BLEND_STATE)); - - for (i = 0; i < ARRAY_SIZE(effect_types); ++i) - { -@@ -4049,6 +4302,7 @@ struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ct - /* Save and restore everything that matters. - * Note that saving the scope stack is hard, and shouldn't be necessary. */ - -+ hlsl_push_scope(ctx); - ctx->scanner = NULL; - ctx->internal_func_name = internal_name->buffer; - ctx->cur_function = NULL; -@@ -4056,6 +4310,7 @@ struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ct - ctx->scanner = saved_scanner; - ctx->internal_func_name = saved_internal_func_name; - ctx->cur_function = saved_cur_function; -+ hlsl_pop_scope(ctx); - if (ret) - { - ERR("Failed to compile intrinsic, error %u.\n", ret); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -index 27814f3a56f..22e25b23988 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h -@@ -78,10 +78,12 @@ enum hlsl_type_class - HLSL_CLASS_LAST_NUMERIC = HLSL_CLASS_MATRIX, - HLSL_CLASS_STRUCT, - HLSL_CLASS_ARRAY, -+ HLSL_CLASS_DEPTH_STENCIL_STATE, - HLSL_CLASS_DEPTH_STENCIL_VIEW, - HLSL_CLASS_EFFECT_GROUP, - HLSL_CLASS_PASS, - HLSL_CLASS_PIXEL_SHADER, -+ HLSL_CLASS_RASTERIZER_STATE, - HLSL_CLASS_RENDER_TARGET_VIEW, - HLSL_CLASS_SAMPLER, - HLSL_CLASS_STRING, -@@ -89,7 +91,14 @@ enum hlsl_type_class - HLSL_CLASS_TEXTURE, - HLSL_CLASS_UAV, - HLSL_CLASS_VERTEX_SHADER, -+ HLSL_CLASS_COMPUTE_SHADER, -+ HLSL_CLASS_DOMAIN_SHADER, -+ HLSL_CLASS_HULL_SHADER, -+ HLSL_CLASS_GEOMETRY_SHADER, -+ HLSL_CLASS_CONSTANT_BUFFER, -+ HLSL_CLASS_BLEND_STATE, - HLSL_CLASS_VOID, -+ HLSL_CLASS_NULL, - }; - - enum hlsl_base_type -@@ -222,6 +231,8 @@ struct hlsl_semantic - const char *name; - uint32_t index; - -+ /* Name exactly as it appears in the sources. */ -+ const char *raw_name; - /* If the variable or field that stores this hlsl_semantic has already reported that it is missing. */ - bool reported_missing; - /* In case the variable or field that stores this semantic has already reported to use a -@@ -259,8 +270,20 @@ struct hlsl_struct_field - * struct. */ - struct hlsl_reg - { -- /* Index of the first register allocated. */ -+ /* Register number of the first register allocated. */ - uint32_t id; -+ /* For descriptors (buffer, texture, sampler, UAV) this is the base binding -+ * index of the descriptor. -+ * For 5.1 and above descriptors have space and may be arrayed, in which -+ * case the array shares a single register ID but has a range of register -+ * indices, and "id" and "index" are as a rule not equal. -+ * For versions below 5.1, the register number for descriptors is the same -+ * as its external binding index, so only "index" is used, and "id" is -+ * ignored. -+ * For numeric registers "index" is not used. */ -+ uint32_t index; -+ /* Register space of a descriptor. Not used for numeric registers. */ -+ uint32_t space; - /* Number of registers to be allocated. - * Unlike the variable's type's regsize, it is not expressed in register components, but rather - * in whole registers, and may depend on which components are used within the shader. */ -@@ -289,6 +312,7 @@ enum hlsl_ir_node_type - HLSL_IR_JUMP, - HLSL_IR_RESOURCE_LOAD, - HLSL_IR_RESOURCE_STORE, -+ HLSL_IR_STRING_CONSTANT, - HLSL_IR_STORE, - HLSL_IR_SWIZZLE, - HLSL_IR_SWITCH, -@@ -371,6 +395,7 @@ struct hlsl_attribute - #define HLSL_STORAGE_LINEAR 0x00010000 - #define HLSL_MODIFIER_SINGLE 0x00020000 - #define HLSL_MODIFIER_EXPORT 0x00040000 -+#define HLSL_STORAGE_ANNOTATION 0x00080000 - - #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ - HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ -@@ -385,7 +410,7 @@ struct hlsl_attribute - - /* Reservation of a register and/or an offset for objects inside constant buffers, to be used as a - * starting point of their allocation. They are available through the register(·) and the -- * packoffset(·) syntaxes, respectivelly. -+ * packoffset(·) syntaxes, respectively. - * The constant buffer offset is measured register components. */ - struct hlsl_reg_reservation - { -@@ -396,6 +421,14 @@ struct hlsl_reg_reservation - unsigned int offset_index; - }; - -+union hlsl_constant_value_component -+{ -+ uint32_t u; -+ int32_t i; -+ float f; -+ double d; -+}; -+ - struct hlsl_ir_var - { - struct hlsl_type *data_type; -@@ -418,6 +451,17 @@ struct hlsl_ir_var - /* Scope that contains annotations for this variable. */ - struct hlsl_scope *annotations; - -+ /* Array of default values the variable was initialized with, one for each component. -+ * Only for variables that need it, such as uniforms and variables inside constant buffers. -+ * This pointer is NULL for others. */ -+ struct hlsl_default_value -+ { -+ /* Default value, in case the component is a string, otherwise it is NULL. */ -+ const char *string; -+ /* Default value, in case the component is a numeric value. */ -+ union hlsl_constant_value_component number; -+ } *default_values; -+ - /* A dynamic array containing the state block on the variable's declaration, if any. - * An array variable may contain multiple state blocks. - * A technique pass will always contain one. -@@ -460,6 +504,8 @@ struct hlsl_ir_var - uint32_t is_uniform : 1; - uint32_t is_param : 1; - uint32_t is_separated_resource : 1; -+ uint32_t is_synthetic : 1; -+ uint32_t has_explicit_bind_point : 1; - }; - - /* This struct is used to represent assignments in state block entries: -@@ -470,22 +516,31 @@ struct hlsl_ir_var - * name[lhs_index] = args[0] - * - or - - * name[lhs_index] = {args[0], args[1], ...}; -+ * -+ * This struct also represents function call syntax: -+ * name(args[0], args[1], ...) - */ - struct hlsl_state_block_entry - { -- /* For assignments, the name in the lhs. */ -+ /* Whether this entry is a function call. */ -+ bool is_function_call; -+ -+ /* For assignments, the name in the lhs. -+ * For functions, the name of the function. */ - char *name; - /* Resolved format-specific property identifier. */ - unsigned int name_id; - -- /* Whether the lhs in the assignment is indexed and, in that case, its index. */ -+ /* For assignments, whether the lhs of an assignment is indexed and, in -+ * that case, its index. */ - bool lhs_has_index; - unsigned int lhs_index; - -- /* Instructions present in the rhs. */ -+ /* Instructions present in the rhs or the function arguments. */ - struct hlsl_block *instrs; - -- /* For assignments, arguments of the rhs initializer. */ -+ /* For assignments, arguments of the rhs initializer. -+ * For function calls, the arguments themselves. */ - struct hlsl_src *args; - unsigned int args_count; - }; -@@ -556,12 +611,21 @@ struct hlsl_ir_if - struct hlsl_block else_block; - }; - -+enum hlsl_ir_loop_unroll_type -+{ -+ HLSL_IR_LOOP_UNROLL, -+ HLSL_IR_LOOP_FORCE_UNROLL, -+ HLSL_IR_LOOP_FORCE_LOOP -+}; -+ - struct hlsl_ir_loop - { - struct hlsl_ir_node node; - /* loop condition is stored in the body (as "if (!condition) break;") */ - struct hlsl_block body; - unsigned int next_index; /* liveness index of the end of the loop */ -+ unsigned int unroll_limit; -+ enum hlsl_ir_loop_unroll_type unroll_type; - }; - - struct hlsl_ir_switch_case -@@ -583,13 +647,14 @@ struct hlsl_ir_switch - enum hlsl_ir_expr_op - { - HLSL_OP0_VOID, -+ HLSL_OP0_RASTERIZER_SAMPLE_COUNT, - - HLSL_OP1_ABS, - HLSL_OP1_BIT_NOT, - HLSL_OP1_CAST, - HLSL_OP1_CEIL, - HLSL_OP1_COS, -- HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ -+ HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi], writes to .x */ - HLSL_OP1_DSX, - HLSL_OP1_DSX_COARSE, - HLSL_OP1_DSX_FINE, -@@ -597,6 +662,7 @@ enum hlsl_ir_expr_op - HLSL_OP1_DSY_COARSE, - HLSL_OP1_DSY_FINE, - HLSL_OP1_EXP2, -+ HLSL_OP1_F16TOF32, - HLSL_OP1_FLOOR, - HLSL_OP1_FRACT, - HLSL_OP1_LOG2, -@@ -610,7 +676,7 @@ enum hlsl_ir_expr_op - HLSL_OP1_SAT, - HLSL_OP1_SIGN, - HLSL_OP1_SIN, -- HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi] */ -+ HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi], writes to .y */ - HLSL_OP1_SQRT, - HLSL_OP1_TRUNC, - -@@ -643,6 +709,7 @@ enum hlsl_ir_expr_op - * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ - HLSL_OP3_CMP, - HLSL_OP3_TERNARY, -+ HLSL_OP3_MAD, - }; - - #define HLSL_MAX_OPERANDS 3 -@@ -775,18 +842,18 @@ struct hlsl_ir_constant - struct hlsl_ir_node node; - struct hlsl_constant_value - { -- union hlsl_constant_value_component -- { -- uint32_t u; -- int32_t i; -- float f; -- double d; -- } u[4]; -+ union hlsl_constant_value_component u[4]; - } value; - /* Constant register of type 'c' where the constant value is stored for SM1. */ - struct hlsl_reg reg; - }; - -+struct hlsl_ir_string_constant -+{ -+ struct hlsl_ir_node node; -+ char *string; -+}; -+ - /* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, - * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ - struct hlsl_ir_stateblock_constant -@@ -811,6 +878,8 @@ struct hlsl_scope - bool loop; - /* The scope was created for the switch statement. */ - bool _switch; -+ /* The scope contains annotation variables. */ -+ bool annotations; - }; - - struct hlsl_profile_info -@@ -931,7 +1000,9 @@ struct hlsl_ctx - /* matrix[HLSL_TYPE_FLOAT][1][3] is a float4x2, i.e. dimx = 2, dimy = 4 */ - struct hlsl_type *matrix[HLSL_TYPE_LAST_SCALAR + 1][4][4]; - struct hlsl_type *sampler[HLSL_SAMPLER_DIM_LAST_SAMPLER + 1]; -+ struct hlsl_type *string; - struct hlsl_type *Void; -+ struct hlsl_type *null; - } builtin_types; - - /* List of the instruction nodes for initializing static variables. */ -@@ -948,6 +1019,8 @@ struct hlsl_ctx - } *regs; - size_t count, size; - } constant_defs; -+ /* 'c' registers where the constants expected by SM2 sincos are stored. */ -+ struct hlsl_reg d3dsincosconst1, d3dsincosconst2; - /* Number of temp. registers required for the shader to run, i.e. the largest temp register - * index that will be used in the output bytecode (+1). */ - uint32_t temp_count; -@@ -994,85 +1067,91 @@ struct hlsl_resource_load_params - - static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_CALL); -+ VKD3D_ASSERT(node->type == HLSL_IR_CALL); - return CONTAINING_RECORD(node, struct hlsl_ir_call, node); - } - - static inline struct hlsl_ir_constant *hlsl_ir_constant(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_CONSTANT); -+ VKD3D_ASSERT(node->type == HLSL_IR_CONSTANT); - return CONTAINING_RECORD(node, struct hlsl_ir_constant, node); - } - -+static inline struct hlsl_ir_string_constant *hlsl_ir_string_constant(const struct hlsl_ir_node *node) -+{ -+ VKD3D_ASSERT(node->type == HLSL_IR_STRING_CONSTANT); -+ return CONTAINING_RECORD(node, struct hlsl_ir_string_constant, node); -+} -+ - static inline struct hlsl_ir_expr *hlsl_ir_expr(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_EXPR); -+ VKD3D_ASSERT(node->type == HLSL_IR_EXPR); - return CONTAINING_RECORD(node, struct hlsl_ir_expr, node); - } - - static inline struct hlsl_ir_if *hlsl_ir_if(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_IF); -+ VKD3D_ASSERT(node->type == HLSL_IR_IF); - return CONTAINING_RECORD(node, struct hlsl_ir_if, node); - } - - static inline struct hlsl_ir_jump *hlsl_ir_jump(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_JUMP); -+ VKD3D_ASSERT(node->type == HLSL_IR_JUMP); - return CONTAINING_RECORD(node, struct hlsl_ir_jump, node); - } - - static inline struct hlsl_ir_load *hlsl_ir_load(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_LOAD); -+ VKD3D_ASSERT(node->type == HLSL_IR_LOAD); - return CONTAINING_RECORD(node, struct hlsl_ir_load, node); - } - - static inline struct hlsl_ir_loop *hlsl_ir_loop(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_LOOP); -+ VKD3D_ASSERT(node->type == HLSL_IR_LOOP); - return CONTAINING_RECORD(node, struct hlsl_ir_loop, node); - } - - static inline struct hlsl_ir_resource_load *hlsl_ir_resource_load(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_RESOURCE_LOAD); -+ VKD3D_ASSERT(node->type == HLSL_IR_RESOURCE_LOAD); - return CONTAINING_RECORD(node, struct hlsl_ir_resource_load, node); - } - - static inline struct hlsl_ir_resource_store *hlsl_ir_resource_store(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_RESOURCE_STORE); -+ VKD3D_ASSERT(node->type == HLSL_IR_RESOURCE_STORE); - return CONTAINING_RECORD(node, struct hlsl_ir_resource_store, node); - } - - static inline struct hlsl_ir_store *hlsl_ir_store(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_STORE); -+ VKD3D_ASSERT(node->type == HLSL_IR_STORE); - return CONTAINING_RECORD(node, struct hlsl_ir_store, node); - } - - static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_SWIZZLE); -+ VKD3D_ASSERT(node->type == HLSL_IR_SWIZZLE); - return CONTAINING_RECORD(node, struct hlsl_ir_swizzle, node); - } - - static inline struct hlsl_ir_index *hlsl_ir_index(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_INDEX); -+ VKD3D_ASSERT(node->type == HLSL_IR_INDEX); - return CONTAINING_RECORD(node, struct hlsl_ir_index, node); - } - - static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_SWITCH); -+ VKD3D_ASSERT(node->type == HLSL_IR_SWITCH); - return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); - } - - static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) - { -- assert(node->type == HLSL_IR_STATEBLOCK_CONSTANT); -+ VKD3D_ASSERT(node->type == HLSL_IR_STATEBLOCK_CONSTANT); - return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); - } - -@@ -1249,6 +1328,13 @@ void hlsl_block_cleanup(struct hlsl_block *block); - bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); - - void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); -+void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); -+ -+bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, -+ const struct vkd3d_shader_location *loc); -+struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, -+ struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, -+ unsigned int lhs_index, unsigned int arg_index); - - void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -@@ -1259,7 +1345,9 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d - bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); - - void hlsl_cleanup_deref(struct hlsl_deref *deref); -+ - void hlsl_cleanup_semantic(struct hlsl_semantic *semantic); -+bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src); - - void hlsl_cleanup_ir_switch_cases(struct list *cases); - void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c); -@@ -1270,6 +1358,7 @@ void hlsl_free_attribute(struct hlsl_attribute *attr); - void hlsl_free_instr(struct hlsl_ir_node *node); - void hlsl_free_instr_list(struct list *list); - void hlsl_free_state_block(struct hlsl_state_block *state_block); -+void hlsl_free_state_block_entry(struct hlsl_state_block_entry *state_block_entry); - void hlsl_free_type(struct hlsl_type *type); - void hlsl_free_var(struct hlsl_ir_var *decl); - -@@ -1342,7 +1431,7 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); - struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, - struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, -- struct hlsl_block *block, const struct vkd3d_shader_location *loc); -+ struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, - const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, -@@ -1353,6 +1442,8 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned - struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, - struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, -+ const struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, - struct hlsl_type *type, const struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, -@@ -1361,8 +1452,10 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ - unsigned int sample_count); - struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, - struct hlsl_type *format, bool rasteriser_ordered); -+struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format); - struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, - const struct vkd3d_shader_location *loc); -+struct hlsl_ir_node *hlsl_new_null_constant(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc); - struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, - const struct vkd3d_shader_location *loc); - struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, -@@ -1432,10 +1525,16 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, - - D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); - D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); --bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, -- bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); --bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); --int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); -+bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, -+ unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); -+bool hlsl_sm1_usage_from_semantic(const char *semantic_name, -+ uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); -+ -+void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); -+int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, -+ const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, -+ struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, -+ struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); - - bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, - const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -index a5923d8bf8e..0c02b27817e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l -@@ -29,6 +29,8 @@ - - static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); - -+static void apply_escape_sequences(char *str); -+ - #define YY_USER_ACTION update_location(yyget_extra(yyscanner), yyget_lloc(yyscanner)); - - %} -@@ -49,11 +51,11 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); - RESERVED1 auto|catch|char|class|const_cast|delete|dynamic_cast|enum - RESERVED2 explicit|friend|goto|long|mutable|new|operator|private|protected|public - RESERVED3 reinterpret_cast|short|signed|sizeof|static_cast|template|this|throw|try --RESERVED4 typename|union|unsigned|using|virtual -+RESERVED4 typename|union|using|virtual - - WS [ \t] - NEWLINE (\n)|(\r\n) --STRING \"[^\"]*\" -+STRING \"([^\"\\]|\\.)*\" - IDENTIFIER [A-Za-z_][A-Za-z0-9_]* - - ANY (.) -@@ -105,6 +107,7 @@ matrix {return KW_MATRIX; } - namespace {return KW_NAMESPACE; } - nointerpolation {return KW_NOINTERPOLATION; } - noperspective {return KW_NOPERSPECTIVE; } -+NULL {return KW_NULL; } - out {return KW_OUT; } - packoffset {return KW_PACKOFFSET; } - pass {return KW_PASS; } -@@ -142,6 +145,7 @@ stateblock {return KW_STATEBLOCK; } - stateblock_state {return KW_STATEBLOCK_STATE; } - static {return KW_STATIC; } - string {return KW_STRING; } -+String {return KW_STRING; } - struct {return KW_STRUCT; } - switch {return KW_SWITCH; } - tbuffer {return KW_TBUFFER; } -@@ -164,6 +168,7 @@ textureCUBE {return KW_TEXTURECUBE; } - TextureCubeArray {return KW_TEXTURECUBEARRAY; } - true {return KW_TRUE; } - typedef {return KW_TYPEDEF; } -+unsigned {return KW_UNSIGNED; } - uniform {return KW_UNIFORM; } - vector {return KW_VECTOR; } - VertexShader {return KW_VERTEXSHADER; } -@@ -197,7 +202,9 @@ while {return KW_WHILE; } - struct hlsl_ctx *ctx = yyget_extra(yyscanner); - - yylval->name = hlsl_strdup(ctx, yytext); -- if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) -+ if (hlsl_version_ge(ctx, 5, 1) && !strcmp(yytext, "ConstantBuffer")) -+ return KW_CONSTANTBUFFER; -+ else if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) - return VAR_IDENTIFIER; - else if (hlsl_get_type(ctx->cur_scope, yytext, true, true)) - return TYPE_IDENTIFIER; -@@ -205,6 +212,16 @@ while {return KW_WHILE; } - return NEW_IDENTIFIER; - } - -+{STRING} { -+ struct hlsl_ctx *ctx = yyget_extra(yyscanner); -+ char *string = hlsl_strdup(ctx, yytext + 1); -+ -+ string[strlen(string) - 1] = 0; -+ apply_escape_sequences(string); -+ yylval->name = string; -+ return STRING; -+ } -+ - [0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[h|H|f|F]? { - yylval->floatval = atof(yytext); - return C_FLOAT; -@@ -289,6 +306,7 @@ while {return KW_WHILE; } - - BEGIN(pp_ignore); - string[strlen(string) - 1] = 0; -+ apply_escape_sequences(string); - yylval->name = string; - return STRING; - } -@@ -338,3 +356,115 @@ int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hls - yylex_destroy(ctx->scanner); - return ret; - } -+ -+static void apply_escape_sequences(char *str) -+{ -+ unsigned int i = 0, k = 0, r; -+ -+ while (str[i]) -+ { -+ unsigned char v = 0; -+ -+ if (str[i] != '\\') -+ { -+ str[k++] = str[i]; -+ ++i; -+ continue; -+ } -+ -+ ++i; -+ VKD3D_ASSERT(str[i]); -+ -+ if ('0' <= str[i] && str[i] <= '7') -+ { -+ /* octal, up to 3 digits. */ -+ for (r = 0; r < 3; ++r) -+ { -+ char c = str[i]; -+ -+ if ('0' <= c && c <= '7') -+ { -+ v = v << 3; -+ v += c - '0'; -+ ++i; -+ } -+ else -+ break; -+ } -+ str[k++] = v; -+ continue; -+ } -+ -+ if (str[i] == 'x') -+ { -+ bool number = false; -+ -+ /* hexadecimal */ -+ ++i; -+ while (1) -+ { -+ char c = str[i]; -+ -+ if ('0' <= c && c <= '9') -+ { -+ v = v << 4; -+ v += c - '0'; -+ number = true; -+ ++i; -+ } -+ else if ('a' <= c && c <= 'f') -+ { -+ v = v << 4; -+ v += c - 'a' + 10; -+ number = true; -+ ++i; -+ } -+ else if ('A' <= c && c <= 'F') -+ { -+ v = v << 4; -+ v += c - 'A' + 10; -+ number = true; -+ ++i; -+ } -+ else -+ break; -+ } -+ if (number) -+ str[k++] = v; -+ else -+ str[k++] = 'x'; -+ continue; -+ } -+ -+ switch (str[i]) -+ { -+ case 'a': -+ str[k++] = '\a'; -+ break; -+ case 'b': -+ str[k++] = '\b'; -+ break; -+ case 'f': -+ str[k++] = '\f'; -+ break; -+ case 'n': -+ str[k++] = '\n'; -+ break; -+ case 'r': -+ str[k++] = '\r'; -+ break; -+ case 't': -+ str[k++] = '\t'; -+ break; -+ case 'v': -+ str[k++] = '\v'; -+ break; -+ -+ default: -+ str[k++] = str[i]; -+ break; -+ } -+ ++i; -+ } -+ str[k++] = '\0'; -+} -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -index 9c1bdef926d..3f319dea0d8 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y -@@ -34,6 +34,14 @@ struct parse_fields - size_t count, capacity; - }; - -+struct parse_initializer -+{ -+ struct hlsl_ir_node **args; -+ unsigned int args_count; -+ struct hlsl_block *instrs; -+ bool braces; -+}; -+ - struct parse_parameter - { - struct hlsl_type *type; -@@ -41,6 +49,7 @@ struct parse_parameter - struct hlsl_semantic semantic; - struct hlsl_reg_reservation reg_reservation; - uint32_t modifiers; -+ struct parse_initializer initializer; - }; - - struct parse_colon_attribute -@@ -49,14 +58,6 @@ struct parse_colon_attribute - struct hlsl_reg_reservation reg_reservation; - }; - --struct parse_initializer --{ -- struct hlsl_ir_node **args; -- unsigned int args_count; -- struct hlsl_block *instrs; -- bool braces; --}; -- - struct parse_array_sizes - { - uint32_t *sizes; /* innermost first */ -@@ -73,6 +74,7 @@ struct parse_variable_def - struct hlsl_semantic semantic; - struct hlsl_reg_reservation reg_reservation; - struct parse_initializer initializer; -+ struct hlsl_scope *annotations; - - struct hlsl_type *basic_type; - uint32_t modifiers; -@@ -302,6 +304,26 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ - } - } - -+ if (src->class == HLSL_CLASS_NULL) -+ { -+ switch (dst->class) -+ { -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_STRING: -+ case HLSL_CLASS_TEXTURE: -+ case HLSL_CLASS_UAV: -+ case HLSL_CLASS_VERTEX_SHADER: -+ return true; -+ default: -+ break; -+ } -+ } -+ - return hlsl_types_are_componentwise_equal(ctx, src, dst); - } - -@@ -329,6 +351,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl - if (hlsl_types_are_equal(src_type, dst_type)) - return node; - -+ if (src_type->class == HLSL_CLASS_NULL) -+ return node; -+ - if (src_type->class > HLSL_CLASS_VECTOR || dst_type->class > HLSL_CLASS_VECTOR) - { - unsigned int src_comp_count = hlsl_type_component_count(src_type); -@@ -342,11 +367,11 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl - broadcast = hlsl_is_numeric_type(src_type) && src_type->dimx == 1 && src_type->dimy == 1; - matrix_cast = !broadcast && dst_comp_count != src_comp_count - && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; -- assert(src_comp_count >= dst_comp_count || broadcast); -+ VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast); - if (matrix_cast) - { -- assert(dst_type->dimx <= src_type->dimx); -- assert(dst_type->dimy <= src_type->dimy); -+ VKD3D_ASSERT(dst_type->dimx <= src_type->dimx); -+ VKD3D_ASSERT(dst_type->dimy <= src_type->dimy); - } - - if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, loc))) -@@ -573,12 +598,104 @@ static void check_loop_attributes(struct hlsl_ctx *ctx, const struct parse_attri - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unroll attribute can't be used with 'fastopt' attribute."); - } - -+static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx, -+ struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_default_value ret = {0}; -+ struct hlsl_ir_node *node; -+ struct hlsl_block expr; -+ struct hlsl_src src; -+ -+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ switch (node->type) -+ { -+ case HLSL_IR_CONSTANT: -+ case HLSL_IR_EXPR: -+ case HLSL_IR_STRING_CONSTANT: -+ case HLSL_IR_SWIZZLE: -+ case HLSL_IR_LOAD: -+ case HLSL_IR_INDEX: -+ continue; -+ case HLSL_IR_STORE: -+ if (hlsl_ir_store(node)->lhs.var->is_synthetic) -+ break; -+ /* fall-through */ -+ case HLSL_IR_CALL: -+ case HLSL_IR_IF: -+ case HLSL_IR_LOOP: -+ case HLSL_IR_JUMP: -+ case HLSL_IR_RESOURCE_LOAD: -+ case HLSL_IR_RESOURCE_STORE: -+ case HLSL_IR_SWITCH: -+ case HLSL_IR_STATEBLOCK_CONSTANT: -+ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Expected literal expression."); -+ break; -+ } -+ } -+ -+ if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) -+ return ret; -+ hlsl_block_add_block(&expr, block); -+ -+ if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) -+ { -+ hlsl_block_cleanup(&expr); -+ return ret; -+ } -+ -+ /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ -+ hlsl_src_from_node(&src, node_from_block(&expr)); -+ hlsl_run_const_passes(ctx, &expr); -+ node = src.node; -+ hlsl_src_remove(&src); -+ -+ if (node->type == HLSL_IR_CONSTANT) -+ { -+ struct hlsl_ir_constant *constant = hlsl_ir_constant(node); -+ -+ ret.number = constant->value.u[0]; -+ } -+ else if (node->type == HLSL_IR_STRING_CONSTANT) -+ { -+ struct hlsl_ir_string_constant *string = hlsl_ir_string_constant(node); -+ -+ if (!(ret.string = vkd3d_strdup(string->string))) -+ return ret; -+ } -+ else if (node->type == HLSL_IR_STRING_CONSTANT) -+ { -+ hlsl_fixme(ctx, &node->loc, "Evaluate string constants as static expressions."); -+ } -+ else -+ { -+ hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Failed to evaluate constant expression."); -+ } -+ -+ hlsl_block_cleanup(&expr); -+ -+ return ret; -+} -+ -+static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_default_value res; -+ -+ res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); -+ VKD3D_ASSERT(!res.string); -+ return res.number.u; -+} -+ - static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, - struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) - { -+ enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; -+ unsigned int i, unroll_limit = 0; - struct hlsl_ir_node *loop; -- unsigned int i; - - if (attribute_list_has_duplicates(attributes)) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); -@@ -591,18 +708,29 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - const struct hlsl_attribute *attr = attributes->attrs[i]; - if (!strcmp(attr->name, "unroll")) - { -- if (attr->args_count) -+ if (attr->args_count > 1) - { -- hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); -+ hlsl_warning(ctx, &attr->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, -+ "Ignoring 'unroll' attribute with more than 1 argument."); -+ continue; - } -- else -+ -+ if (attr->args_count == 1) - { -- hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); -+ struct hlsl_block expr; -+ hlsl_block_init(&expr); -+ if (!hlsl_clone_block(ctx, &expr, &attr->instrs)) -+ return NULL; -+ -+ unroll_limit = evaluate_static_expression_as_uint(ctx, &expr, loc); -+ hlsl_block_cleanup(&expr); - } -+ -+ unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; - } - else if (!strcmp(attr->name, "loop")) - { -- /* TODO: this attribute will be used to disable unrolling, once it's implememented. */ -+ unroll_type = HLSL_IR_LOOP_FORCE_LOOP; - } - else if (!strcmp(attr->name, "fastopt") - || !strcmp(attr->name, "allow_uav_condition")) -@@ -631,7 +759,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, - else - list_move_head(&body->instrs, &cond->instrs); - -- if (!(loop = hlsl_new_loop(ctx, body, loc))) -+ if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) - goto oom; - hlsl_block_add_instr(init, loop); - -@@ -663,7 +791,7 @@ static void cleanup_parse_attribute_list(struct parse_attribute_list *attr_list) - { - unsigned int i = 0; - -- assert(attr_list); -+ VKD3D_ASSERT(attr_list); - for (i = 0; i < attr_list->count; ++i) - hlsl_free_attribute((struct hlsl_attribute *) attr_list->attrs[i]); - vkd3d_free(attr_list->attrs); -@@ -823,7 +951,7 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct hlsl_block *block, st - { - struct hlsl_ir_node *index, *c; - -- assert(idx < record->data_type->e.record.field_count); -+ VKD3D_ASSERT(idx < record->data_type->e.record.field_count); - - if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) - return false; -@@ -953,7 +1081,7 @@ static void free_parse_variable_def(struct parse_variable_def *v) - vkd3d_free(v->arrays.sizes); - vkd3d_free(v->name); - hlsl_cleanup_semantic(&v->semantic); -- assert(!v->state_blocks); -+ VKD3D_ASSERT(!v->state_blocks); - vkd3d_free(v); - } - -@@ -964,7 +1092,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - size_t i = 0; - - if (type->class == HLSL_CLASS_MATRIX) -- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - - memset(fields, 0, sizeof(*fields)); - fields->count = list_count(defs); -@@ -1013,6 +1141,10 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, - field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); - } - } -+ -+ if (hlsl_version_ge(ctx, 5, 1) && field->type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(field->type)) -+ hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); -+ - vkd3d_free(v->arrays.sizes); - field->loc = v->loc; - field->name = v->name; -@@ -1094,13 +1226,16 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, - return true; - } - -+static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, -+ struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src); -+ - static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, - struct parse_parameter *param, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_var *var; - - if (param->type->class == HLSL_CLASS_MATRIX) -- assert(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ VKD3D_ASSERT(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - - if ((param->modifiers & HLSL_STORAGE_OUT) && (param->modifiers & HLSL_STORAGE_UNIFORM)) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -@@ -1110,11 +1245,52 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "packoffset() is not allowed on function parameters."); - -+ if (parameters->count && parameters->vars[parameters->count - 1]->default_values -+ && !param->initializer.args_count) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, -+ "Missing default value for parameter '%s'.", param->name); -+ -+ if (param->initializer.args_count && (param->modifiers & HLSL_STORAGE_OUT)) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, -+ "Output parameter '%s' has a default value.", param->name); -+ - if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, - ¶m->reg_reservation))) - return false; - var->is_param = 1; - -+ if (param->initializer.args_count) -+ { -+ unsigned int component_count = hlsl_type_component_count(param->type); -+ unsigned int store_index = 0; -+ unsigned int size, i; -+ -+ if (!(var->default_values = hlsl_calloc(ctx, component_count, sizeof(*var->default_values)))) -+ return false; -+ -+ if (!param->initializer.braces) -+ { -+ if (!(add_implicit_conversion(ctx, param->initializer.instrs, param->initializer.args[0], param->type, loc))) -+ return false; -+ -+ param->initializer.args[0] = node_from_block(param->initializer.instrs); -+ } -+ -+ size = initializer_size(¶m->initializer); -+ if (component_count != size) -+ { -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected %u components in initializer, but got %u.", component_count, size); -+ } -+ -+ for (i = 0; i < param->initializer.args_count; ++i) -+ { -+ initialize_var_components(ctx, param->initializer.instrs, var, &store_index, param->initializer.args[i]); -+ } -+ -+ free_parse_initializer(¶m->initializer); -+ } -+ - if (!hlsl_add_var(ctx, var, false)) - { - hlsl_free_var(var); -@@ -1210,12 +1386,42 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl - return true; - } - --static bool parse_reservation_index(const char *string, char *type, uint32_t *index) -+static bool parse_reservation_index(struct hlsl_ctx *ctx, const char *string, unsigned int bracket_offset, -+ struct hlsl_reg_reservation *reservation) - { -- if (!sscanf(string + 1, "%u", index)) -- return false; -+ char *endptr; -+ -+ reservation->reg_type = ascii_tolower(string[0]); -+ -+ /* Prior to SM5.1, fxc simply ignored bracket offsets for 'b' types. */ -+ if (reservation->reg_type == 'b' && hlsl_version_lt(ctx, 5, 1)) -+ { -+ bracket_offset = 0; -+ } -+ -+ if (string[1] == '\0') -+ { -+ reservation->reg_index = bracket_offset; -+ return true; -+ } -+ -+ reservation->reg_index = strtoul(string + 1, &endptr, 10) + bracket_offset; -+ -+ if (*endptr) -+ { -+ /* fxc for SM >= 4 treats all parse failures for 'b' types as successes, -+ * setting index to -1. It will later fail while validating slot limits. */ -+ if (reservation->reg_type == 'b' && hlsl_version_ge(ctx, 4, 0)) -+ { -+ reservation->reg_index = -1; -+ return true; -+ } -+ -+ /* All other types tolerate leftover characters. */ -+ if (endptr == string + 1) -+ return false; -+ } - -- *type = ascii_tolower(string[0]); - return true; - } - -@@ -1286,72 +1492,6 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * - return block; - } - --static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, -- const struct vkd3d_shader_location *loc) --{ -- struct hlsl_ir_constant *constant; -- struct hlsl_ir_node *node; -- struct hlsl_block expr; -- unsigned int ret = 0; -- struct hlsl_src src; -- -- LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -- { -- switch (node->type) -- { -- case HLSL_IR_CONSTANT: -- case HLSL_IR_EXPR: -- case HLSL_IR_SWIZZLE: -- case HLSL_IR_LOAD: -- case HLSL_IR_INDEX: -- continue; -- case HLSL_IR_CALL: -- case HLSL_IR_IF: -- case HLSL_IR_LOOP: -- case HLSL_IR_JUMP: -- case HLSL_IR_RESOURCE_LOAD: -- case HLSL_IR_RESOURCE_STORE: -- case HLSL_IR_STORE: -- case HLSL_IR_SWITCH: -- case HLSL_IR_STATEBLOCK_CONSTANT: -- hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "Expected literal expression."); -- } -- } -- -- if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) -- return 0; -- hlsl_block_add_block(&expr, block); -- -- if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), -- hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) -- { -- hlsl_block_cleanup(&expr); -- return 0; -- } -- -- /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ -- hlsl_src_from_node(&src, node_from_block(&expr)); -- hlsl_run_const_passes(ctx, &expr); -- node = src.node; -- hlsl_src_remove(&src); -- -- if (node->type == HLSL_IR_CONSTANT) -- { -- constant = hlsl_ir_constant(node); -- ret = constant->value.u[0].u; -- } -- else -- { -- hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -- "Failed to evaluate constant expression."); -- } -- -- hlsl_block_cleanup(&expr); -- -- return ret; --} -- - static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) - { - /* Scalar vars can be converted to pretty much everything */ -@@ -1759,49 +1899,51 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hls - return add_expr(ctx, instrs, op, args, ret_type, loc); - } - --static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, -- struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -+static struct hlsl_ir_node *add_binary_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, -+ struct hlsl_ir_node *lhs, struct hlsl_ir_node *rhs, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); -- -- hlsl_block_add_block(block1, block2); -- destroy_block(block2); -- - switch (op) - { - case HLSL_OP2_ADD: - case HLSL_OP2_DIV: - case HLSL_OP2_MOD: - case HLSL_OP2_MUL: -- add_binary_arithmetic_expr(ctx, block1, op, arg1, arg2, loc); -- break; -+ return add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, loc); - - case HLSL_OP2_BIT_AND: - case HLSL_OP2_BIT_OR: - case HLSL_OP2_BIT_XOR: -- add_binary_bitwise_expr(ctx, block1, op, arg1, arg2, loc); -- break; -+ return add_binary_bitwise_expr(ctx, block, op, lhs, rhs, loc); - - case HLSL_OP2_LESS: - case HLSL_OP2_GEQUAL: - case HLSL_OP2_EQUAL: - case HLSL_OP2_NEQUAL: -- add_binary_comparison_expr(ctx, block1, op, arg1, arg2, loc); -- break; -+ return add_binary_comparison_expr(ctx, block, op, lhs, rhs, loc); - - case HLSL_OP2_LOGIC_AND: - case HLSL_OP2_LOGIC_OR: -- add_binary_logical_expr(ctx, block1, op, arg1, arg2, loc); -- break; -+ return add_binary_logical_expr(ctx, block, op, lhs, rhs, loc); - - case HLSL_OP2_LSHIFT: - case HLSL_OP2_RSHIFT: -- add_binary_shift_expr(ctx, block1, op, arg1, arg2, loc); -- break; -+ return add_binary_shift_expr(ctx, block, op, lhs, rhs, loc); - - default: - vkd3d_unreachable(); - } -+} -+ -+static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, -+ struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); -+ -+ hlsl_block_add_block(block1, block2); -+ destroy_block(block2); -+ -+ if (add_binary_expr(ctx, block1, op, arg1, arg2, loc) == NULL) -+ return NULL; - - return block1; - } -@@ -1862,12 +2004,57 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned - return true; - } - -+static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width) -+{ -+ /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y. -+ * components are indexed by their sources. i.e. the first component comes from the first -+ * component of the rhs. */ -+ unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; -+ -+ /* First, we filter the swizzle to remove components that aren't enabled by writemask. */ -+ for (i = 0; i < 4; ++i) -+ { -+ if (*writemask & (1 << i)) -+ { -+ unsigned int s = (*swizzle >> (i * 8)) & 0xff; -+ unsigned int x = s & 0xf, y = (s >> 4) & 0xf; -+ unsigned int idx = x + y * 4; -+ new_swizzle |= s << (bit++ * 8); -+ if (new_writemask & (1 << idx)) -+ return false; -+ new_writemask |= 1 << idx; -+ } -+ } -+ width = bit; -+ -+ /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the -+ * incoming vector. */ -+ bit = 0; -+ for (i = 0; i < 16; ++i) -+ { -+ for (j = 0; j < width; ++j) -+ { -+ unsigned int s = (new_swizzle >> (j * 8)) & 0xff; -+ unsigned int x = s & 0xf, y = (s >> 4) & 0xf; -+ unsigned int idx = x + y * 4; -+ if (idx == i) -+ inverted |= j << (bit++ * 2); -+ } -+ } -+ -+ *swizzle = inverted; -+ *writemask = new_writemask; -+ *ret_width = width; -+ return true; -+} -+ - static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, - enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) - { - struct hlsl_type *lhs_type = lhs->data_type; - struct hlsl_ir_node *copy; -- unsigned int writemask = 0; -+ unsigned int writemask = 0, width = 0; -+ bool matrix_writemask = false; - - if (assign_op == ASSIGN_OP_SUB) - { -@@ -1879,13 +2066,16 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - { - enum hlsl_ir_expr_op op = op_from_assignment(assign_op); - -- assert(op); -- if (!(rhs = add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, &rhs->loc))) -+ VKD3D_ASSERT(op); -+ if (!(rhs = add_binary_expr(ctx, block, op, lhs, rhs, &rhs->loc))) - return NULL; - } - - if (hlsl_is_numeric_type(lhs_type)) -+ { - writemask = (1 << lhs_type->dimx) - 1; -+ width = lhs_type->dimx; -+ } - - if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) - return NULL; -@@ -1902,12 +2092,24 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); - struct hlsl_ir_node *new_swizzle; - uint32_t s = swizzle->swizzle; -- unsigned int width; - -- if (lhs->data_type->class == HLSL_CLASS_MATRIX) -- hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); -+ VKD3D_ASSERT(!matrix_writemask); - -- if (!invert_swizzle(&s, &writemask, &width)) -+ if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) -+ { -+ if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) -+ { -+ hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); -+ return NULL; -+ } -+ if (!invert_swizzle_matrix(&s, &writemask, &width)) -+ { -+ hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); -+ return NULL; -+ } -+ matrix_writemask = true; -+ } -+ else if (!invert_swizzle(&s, &writemask, &width)) - { - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); - return NULL; -@@ -1947,7 +2149,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - return NULL; - - resource_type = hlsl_deref_get_type(ctx, &resource_deref); -- assert(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); -+ VKD3D_ASSERT(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); - - if (resource_type->class != HLSL_CLASS_UAV) - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -@@ -1955,13 +2157,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - - dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); - -- if (writemask != ((1u << resource_type->e.resource.format->dimx) - 1)) -+ if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy) - hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, - "Resource store expressions must write to all components."); - -- assert(coords->data_type->class == HLSL_CLASS_VECTOR); -- assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); -- assert(coords->data_type->dimx == dim_count); -+ VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ VKD3D_ASSERT(coords->data_type->dimx == dim_count); - - if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) - { -@@ -1971,12 +2173,50 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo - hlsl_block_add_instr(block, store); - hlsl_cleanup_deref(&resource_deref); - } -+ else if (matrix_writemask) -+ { -+ struct hlsl_deref deref; -+ unsigned int i, j, k = 0; -+ -+ hlsl_init_deref_from_index_chain(ctx, &deref, lhs); -+ -+ for (i = 0; i < lhs->data_type->dimy; ++i) -+ { -+ for (j = 0; j < lhs->data_type->dimx; ++j) -+ { -+ struct hlsl_ir_node *load; -+ struct hlsl_block store_block; -+ const unsigned int idx = i * 4 + j; -+ const unsigned int component = i * lhs->data_type->dimx + j; -+ -+ if (!(writemask & (1 << idx))) -+ continue; -+ -+ if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) -+ { -+ hlsl_cleanup_deref(&deref); -+ return NULL; -+ } -+ -+ if (!hlsl_new_store_component(ctx, &store_block, &deref, component, load)) -+ { -+ hlsl_cleanup_deref(&deref); -+ return NULL; -+ } -+ hlsl_block_add_block(block, &store_block); -+ } -+ } -+ -+ hlsl_cleanup_deref(&deref); -+ } - else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) - { - struct hlsl_ir_index *row = hlsl_ir_index(lhs); - struct hlsl_ir_node *mat = row->val.node; - unsigned int i, k = 0; - -+ VKD3D_ASSERT(!matrix_writemask); -+ - for (i = 0; i < mat->data_type->dimx; ++i) - { - struct hlsl_ir_node *cell, *load, *store, *c; -@@ -2067,6 +2307,55 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d - return true; - } - -+/* For some reason, for matrices, values from default value initializers end up in different -+ * components than from regular initializers. Default value initializers fill the matrix in -+ * vertical reading order (left-to-right top-to-bottom) instead of regular reading order -+ * (top-to-bottom left-to-right), so they have to be adjusted. -+ * An exception is that the order of matrix initializers for function parameters are row-major -+ * (top-to-bottom left-to-right). */ -+static unsigned int get_component_index_from_default_initializer_index(struct hlsl_ctx *ctx, -+ struct hlsl_type *type, unsigned int index) -+{ -+ unsigned int element_comp_count, element, x, y, i; -+ unsigned int base = 0; -+ -+ if (ctx->profile->major_version < 4) -+ return index; -+ -+ if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) -+ return index; -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_MATRIX: -+ x = index / type->dimy; -+ y = index % type->dimy; -+ return y * type->dimx + x; -+ -+ case HLSL_CLASS_ARRAY: -+ element_comp_count = hlsl_type_component_count(type->e.array.type); -+ element = index / element_comp_count; -+ base = element * element_comp_count; -+ return base + get_component_index_from_default_initializer_index(ctx, type->e.array.type, index - base); -+ -+ case HLSL_CLASS_STRUCT: -+ for (i = 0; i < type->e.record.field_count; ++i) -+ { -+ struct hlsl_type *field_type = type->e.record.fields[i].type; -+ -+ element_comp_count = hlsl_type_component_count(field_type); -+ if (index - base < element_comp_count) -+ return base + get_component_index_from_default_initializer_index(ctx, field_type, index - base); -+ base += element_comp_count; -+ } -+ break; -+ -+ default: -+ return index; -+ } -+ vkd3d_unreachable(); -+} -+ - static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, - struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) - { -@@ -2087,12 +2376,33 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i - - dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - -- if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) -- return; -+ if (dst->default_values) -+ { -+ struct hlsl_default_value default_value = {0}; -+ unsigned int dst_index; - -- if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) -- return; -- hlsl_block_add_block(instrs, &block); -+ if (!hlsl_clone_block(ctx, &block, instrs)) -+ return; -+ default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); -+ -+ if (dst->is_param) -+ dst_index = *store_index; -+ else -+ dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); -+ -+ dst->default_values[dst_index] = default_value; -+ -+ hlsl_block_cleanup(&block); -+ } -+ else -+ { -+ if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) -+ return; -+ -+ if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) -+ return; -+ hlsl_block_add_block(instrs, &block); -+ } - - ++*store_index; - } -@@ -2171,16 +2481,17 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - struct hlsl_semantic new_semantic; - uint32_t modifiers = v->modifiers; - bool unbounded_res_array = false; -+ bool constant_buffer = false; - struct hlsl_ir_var *var; - struct hlsl_type *type; - bool local = true; - char *var_name; - unsigned int i; - -- assert(basic_type); -+ VKD3D_ASSERT(basic_type); - - if (basic_type->class == HLSL_CLASS_MATRIX) -- assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ VKD3D_ASSERT(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - - type = basic_type; - -@@ -2190,6 +2501,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); - } - -+ if (type->class == HLSL_CLASS_CONSTANT_BUFFER) -+ { -+ type = type->e.resource.format; -+ constant_buffer = true; -+ } -+ - if (unbounded_res_array) - { - if (v->arrays.count == 1) -@@ -2246,17 +2563,22 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - } - } - -+ if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) -+ { -+ /* SM 5.1/6.x descriptor arrays act differently from previous versions. -+ * Not only are they treated as a single object in reflection, but they -+ * act as a single component for the purposes of assignment and -+ * initialization. */ -+ hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); -+ } -+ - if (!(var_name = vkd3d_strdup(v->name))) - return; - -- new_semantic = v->semantic; -- if (v->semantic.name) -+ if (!hlsl_clone_semantic(ctx, &new_semantic, &v->semantic)) - { -- if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) -- { -- vkd3d_free(var_name); -- return; -- } -+ vkd3d_free(var_name); -+ return; - } - - if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) -@@ -2266,7 +2588,18 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - return; - } - -- var->buffer = ctx->cur_buffer; -+ var->annotations = v->annotations; -+ -+ if (constant_buffer && ctx->cur_scope == ctx->globals) -+ { -+ if (!(var_name = vkd3d_strdup(v->name))) -+ return; -+ var->buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, var_name, modifiers, &v->reg_reservation, NULL, &v->loc); -+ } -+ else -+ { -+ var->buffer = ctx->cur_buffer; -+ } - - if (var->buffer == ctx->globals_buffer) - { -@@ -2289,8 +2622,11 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - if (!(modifiers & HLSL_STORAGE_STATIC)) - var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - -- if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ if ((ctx->profile->major_version < 5 || ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) -+ && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) -+ { - check_invalid_object_fields(ctx, var); -+ } - - if ((func = hlsl_get_first_func_decl(ctx, var->name))) - { -@@ -2323,6 +2659,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, - "Const variable \"%s\" is missing an initializer.", var->name); - } -+ -+ if (var->annotations) -+ { -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, -+ "Annotations are only allowed for objects in the global scope."); -+ } - } - - if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) -@@ -2348,6 +2690,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - { - struct parse_variable_def *v, *v_next; - struct hlsl_block *initializers; -+ unsigned int component_count; - struct hlsl_ir_var *var; - struct hlsl_type *type; - -@@ -2371,6 +2714,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - } - - type = var->data_type; -+ component_count = hlsl_type_component_count(type); - - var->state_blocks = v->state_blocks; - var->state_block_count = v->state_block_count; -@@ -2379,51 +2723,78 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var - v->state_block_capacity = 0; - v->state_blocks = NULL; - -- if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) -+ if (var->state_blocks && component_count != var->state_block_count) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Expected %u state blocks, but got %u.", -- hlsl_type_component_count(type), var->state_block_count); -+ "Expected %u state blocks, but got %u.", component_count, var->state_block_count); - free_parse_variable_def(v); - continue; - } - - if (v->initializer.args_count) - { -- if (v->initializer.braces) -+ unsigned int store_index = 0; -+ bool is_default_values_initializer; -+ unsigned int size, k; -+ -+ is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) -+ || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) -+ || ctx->cur_scope->annotations; -+ -+ if (is_default_values_initializer) - { -- unsigned int size = initializer_size(&v->initializer); -- unsigned int store_index = 0; -- unsigned int k; -+ /* Default values might have been allocated already for another variable of the same name, -+ in the same scope. */ -+ if (var->default_values) -+ { -+ free_parse_variable_def(v); -+ continue; -+ } - -- if (hlsl_type_component_count(type) != size) -+ if (!(var->default_values = hlsl_calloc(ctx, component_count, sizeof(*var->default_values)))) - { -- hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -- "Expected %u components in initializer, but got %u.", -- hlsl_type_component_count(type), size); - free_parse_variable_def(v); - continue; - } -+ } - -- for (k = 0; k < v->initializer.args_count; ++k) -+ if (!v->initializer.braces) -+ { -+ if (!(add_implicit_conversion(ctx, v->initializer.instrs, v->initializer.args[0], type, &v->loc))) - { -- initialize_var_components(ctx, v->initializer.instrs, var, -- &store_index, v->initializer.args[k]); -+ free_parse_variable_def(v); -+ continue; - } -+ -+ v->initializer.args[0] = node_from_block(v->initializer.instrs); - } -- else -+ -+ size = initializer_size(&v->initializer); -+ if (component_count != size) - { -- struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); -+ hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, -+ "Expected %u components in initializer, but got %u.", component_count, size); -+ free_parse_variable_def(v); -+ continue; -+ } - -- assert(v->initializer.args_count == 1); -- hlsl_block_add_instr(v->initializer.instrs, &load->node); -- add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); -+ for (k = 0; k < v->initializer.args_count; ++k) -+ { -+ initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]); - } - -- if (var->storage_modifiers & HLSL_STORAGE_STATIC) -+ if (is_default_values_initializer) -+ { -+ hlsl_dump_var_default_values(var); -+ } -+ else if (var->storage_modifiers & HLSL_STORAGE_STATIC) -+ { - hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); -+ } - else -+ { - hlsl_block_add_block(initializers, v->initializer.instrs); -+ } - } - else if (var->storage_modifiers & HLSL_STORAGE_STATIC) - { -@@ -2469,14 +2840,18 @@ static bool func_is_compatible_match(struct hlsl_ctx *ctx, - { - unsigned int i; - -- if (decl->parameters.count != args->args_count) -+ if (decl->parameters.count < args->args_count) - return false; - -- for (i = 0; i < decl->parameters.count; ++i) -+ for (i = 0; i < args->args_count; ++i) - { - if (!implicit_compatible_data_types(ctx, args->args[i]->data_type, decl->parameters.vars[i]->data_type)) - return false; - } -+ -+ if (args->args_count < decl->parameters.count && !decl->parameters.vars[args->args_count]->default_values) -+ return false; -+ - return true; - } - -@@ -2519,11 +2894,11 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu - const struct parse_initializer *args, const struct vkd3d_shader_location *loc) - { - struct hlsl_ir_node *call; -- unsigned int i; -+ unsigned int i, j; - -- assert(args->args_count == func->parameters.count); -+ VKD3D_ASSERT(args->args_count <= func->parameters.count); - -- for (i = 0; i < func->parameters.count; ++i) -+ for (i = 0; i < args->args_count; ++i) - { - struct hlsl_ir_var *param = func->parameters.vars[i]; - struct hlsl_ir_node *arg = args->args[i]; -@@ -2548,11 +2923,43 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu - } - } - -+ /* Add default values for the remaining parameters. */ -+ for (i = args->args_count; i < func->parameters.count; ++i) -+ { -+ struct hlsl_ir_var *param = func->parameters.vars[i]; -+ unsigned int comp_count = hlsl_type_component_count(param->data_type); -+ struct hlsl_deref param_deref; -+ -+ VKD3D_ASSERT(param->default_values); -+ -+ hlsl_init_simple_deref_from_var(¶m_deref, param); -+ -+ for (j = 0; j < comp_count; ++j) -+ { -+ struct hlsl_type *type = hlsl_type_get_component_type(ctx, param->data_type, j); -+ struct hlsl_constant_value value; -+ struct hlsl_ir_node *comp; -+ struct hlsl_block store_block; -+ -+ if (!param->default_values[j].string) -+ { -+ value.u[0] = param->default_values[j].number; -+ if (!(comp = hlsl_new_constant(ctx, type, &value, loc))) -+ return false; -+ hlsl_block_add_instr(args->instrs, comp); -+ -+ if (!hlsl_new_store_component(ctx, &store_block, ¶m_deref, j, comp)) -+ return false; -+ hlsl_block_add_block(args->instrs, &store_block); -+ } -+ } -+ } -+ - if (!(call = hlsl_new_call(ctx, func, loc))) - return false; - hlsl_block_add_instr(args->instrs, call); - -- for (i = 0; i < func->parameters.count; ++i) -+ for (i = 0; i < args->args_count; ++i) - { - struct hlsl_ir_var *param = func->parameters.vars[i]; - struct hlsl_ir_node *arg = args->args[i]; -@@ -2699,6 +3106,19 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, - return convert_args(ctx, params, type, loc); - } - -+static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_type *type; -+ -+ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) -+ return false; -+ -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); -+ -+ return convert_args(ctx, params, type, loc); -+} -+ - static bool intrinsic_abs(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -2933,6 +3353,29 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, - return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); - } - -+static bool intrinsic_asint(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_type *data_type; -+ -+ data_type = params->args[0]->data_type; -+ if (data_type->e.numeric.type == HLSL_TYPE_BOOL || data_type->e.numeric.type == HLSL_TYPE_DOUBLE) -+ { -+ struct vkd3d_string_buffer *string; -+ -+ if ((string = hlsl_type_to_string(ctx, data_type))) -+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Wrong argument type of asint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", -+ string->buffer); -+ hlsl_release_string_buffer(ctx, string); -+ } -+ data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_INT); -+ -+ operands[0] = params->args[0]; -+ return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); -+} -+ - static bool intrinsic_asuint(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3331,26 +3774,69 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, - if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) - return false; - -- /* 1/ln(2) */ -- if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) -+ /* 1/ln(2) */ -+ if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, coeff); -+ -+ if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, mul, loc); -+} -+ -+static bool intrinsic_exp2(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, arg, loc); -+} -+ -+static bool intrinsic_faceforward(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_function_decl *func; -+ struct hlsl_type *type; -+ char *body; -+ -+ static const char template[] = -+ "%s faceforward(%s n, %s i, %s ng)\n" -+ "{\n" -+ " return dot(i, ng) < 0 ? n : -n;\n" -+ "}\n"; -+ -+ if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) - return false; -- hlsl_block_add_instr(params->instrs, coeff); -+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); - -- if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) -+ if (!(body = hlsl_sprintf_alloc(ctx, template, -+ type->name, type->name, type->name, type->name))) -+ return false; -+ func = hlsl_compile_internal_function(ctx, "faceforward", body); -+ vkd3d_free(body); -+ if (!func) - return false; - -- return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, mul, loc); -+ return add_user_call(ctx, func, params, loc); - } - --static bool intrinsic_exp2(struct hlsl_ctx *ctx, -+static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -- struct hlsl_ir_node *arg; -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_type *type; - -- if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) - return false; - -- return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, arg, loc); -+ type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_FLOAT); -+ -+ operands[0] = params->args[0]; -+ return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc); - } - - static bool intrinsic_floor(struct hlsl_ctx *ctx, -@@ -3646,12 +4132,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, - } - else if (vect_count == 1) - { -- assert(matrix_type->dimx == 1 || matrix_type->dimy == 1); -+ VKD3D_ASSERT(matrix_type->dimx == 1 || matrix_type->dimy == 1); - ret_type = hlsl_get_vector_type(ctx, base, matrix_type->dimx * matrix_type->dimy); - } - else - { -- assert(matrix_type->dimx == 1 && matrix_type->dimy == 1); -+ VKD3D_ASSERT(matrix_type->dimx == 1 && matrix_type->dimy == 1); - ret_type = hlsl_get_scalar_type(ctx, base); - } - -@@ -3764,6 +4250,17 @@ static bool intrinsic_radians(struct hlsl_ctx *ctx, - return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, rad, loc); - } - -+static bool intrinsic_rcp(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *arg; -+ -+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) -+ return false; -+ -+ return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_RCP, arg, loc); -+} -+ - static bool intrinsic_reflect(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -3814,7 +4311,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, - return false; - } - -- assert(params->args_count == 3); -+ VKD3D_ASSERT(params->args_count == 3); - mut_params = *params; - mut_params.args_count = 2; - if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) -@@ -4032,6 +4529,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, - static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, - const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) - { -+ unsigned int sampler_dim = hlsl_sampler_dim_count(dim); - struct hlsl_resource_load_params load_params = { 0 }; - const struct hlsl_type *sampler_type; - struct hlsl_ir_node *coords, *sample; -@@ -4043,11 +4541,6 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - return false; - } - -- if (params->args_count == 4) -- { -- hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); -- } -- - sampler_type = params->args[0]->data_type; - if (sampler_type->class != HLSL_CLASS_SAMPLER - || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) -@@ -4061,18 +4554,22 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - hlsl_release_string_buffer(ctx, string); - } - -- if (!strcmp(name, "tex2Dlod")) -+ if (!strcmp(name, "tex2Dbias") -+ || !strcmp(name, "tex2Dlod")) - { - struct hlsl_ir_node *lod, *c; - -- load_params.type = HLSL_RESOURCE_SAMPLE_LOD; -+ if (!strcmp(name, "tex2Dlod")) -+ load_params.type = HLSL_RESOURCE_SAMPLE_LOD; -+ else -+ load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; - -- if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), hlsl_sampler_dim_count(dim), params->args[1], loc))) -+ if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, params->args[1], loc))) - return false; - hlsl_block_add_instr(params->instrs, c); - -- if (!(coords = add_implicit_conversion(ctx, params->instrs, c, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, -- hlsl_sampler_dim_count(dim)), loc))) -+ if (!(coords = add_implicit_conversion(ctx, params->instrs, c, -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - { - return false; - } -@@ -4099,14 +4596,13 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - - if (hlsl_version_ge(ctx, 4, 0)) - { -- unsigned int count = hlsl_sampler_dim_count(dim); - struct hlsl_ir_node *divisor; - -- if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), count, coords, loc))) -+ if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), sampler_dim, coords, loc))) - return false; - hlsl_block_add_instr(params->instrs, divisor); - -- if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), count, coords, loc))) -+ if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, coords, loc))) - return false; - hlsl_block_add_instr(params->instrs, coords); - -@@ -4120,12 +4616,34 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * - load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; - } - } -+ else if (params->args_count == 4) /* Gradient sampling. */ -+ { -+ if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ { -+ return false; -+ } -+ -+ if (!(load_params.ddx = add_implicit_conversion(ctx, params->instrs, params->args[2], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ { -+ return false; -+ } -+ -+ if (!(load_params.ddy = add_implicit_conversion(ctx, params->instrs, params->args[3], -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) -+ { -+ return false; -+ } -+ -+ load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; -+ } - else - { - load_params.type = HLSL_RESOURCE_SAMPLE; - - if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], -- hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) -+ hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) - { - return false; - } -@@ -4181,12 +4699,30 @@ static bool intrinsic_tex1D(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "tex1D", HLSL_SAMPLER_DIM_1D); - } - -+static bool intrinsic_tex1Dgrad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex1Dgrad", HLSL_SAMPLER_DIM_1D); -+} -+ - static bool intrinsic_tex2D(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { - return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D); - } - -+static bool intrinsic_tex2Dbias(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex2Dbias", HLSL_SAMPLER_DIM_2D); -+} -+ -+static bool intrinsic_tex2Dgrad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex2Dgrad", HLSL_SAMPLER_DIM_2D); -+} -+ - static bool intrinsic_tex2Dlod(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4205,6 +4741,12 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); - } - -+static bool intrinsic_tex3Dgrad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "tex3Dgrad", HLSL_SAMPLER_DIM_3D); -+} -+ - static bool intrinsic_tex3Dproj(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4217,6 +4759,12 @@ static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, - return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); - } - -+static bool intrinsic_texCUBEgrad(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ return intrinsic_tex(ctx, params, loc, "texCUBEgrad", HLSL_SAMPLER_DIM_CUBE); -+} -+ - static bool intrinsic_texCUBEproj(struct hlsl_ctx *ctx, - const struct parse_initializer *params, const struct vkd3d_shader_location *loc) - { -@@ -4336,6 +4884,20 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, - return true; - } - -+static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, -+ const struct parse_initializer *params, const struct vkd3d_shader_location *loc) -+{ -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_ir_node *expr; -+ -+ if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, -+ operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) -+ return false; -+ hlsl_block_add_instr(params->instrs, expr); -+ -+ return true; -+} -+ - static const struct intrinsic_function - { - const char *name; -@@ -4348,12 +4910,14 @@ intrinsic_functions[] = - { - /* Note: these entries should be kept in alphabetical order. */ - {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, -+ {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, - {"abs", 1, true, intrinsic_abs}, - {"acos", 1, true, intrinsic_acos}, - {"all", 1, true, intrinsic_all}, - {"any", 1, true, intrinsic_any}, - {"asfloat", 1, true, intrinsic_asfloat}, - {"asin", 1, true, intrinsic_asin}, -+ {"asint", 1, true, intrinsic_asint}, - {"asuint", -1, true, intrinsic_asuint}, - {"atan", 1, true, intrinsic_atan}, - {"atan2", 2, true, intrinsic_atan2}, -@@ -4375,6 +4939,8 @@ intrinsic_functions[] = - {"dot", 2, true, intrinsic_dot}, - {"exp", 1, true, intrinsic_exp}, - {"exp2", 1, true, intrinsic_exp2}, -+ {"f16tof32", 1, true, intrinsic_f16tof32}, -+ {"faceforward", 3, true, intrinsic_faceforward}, - {"floor", 1, true, intrinsic_floor}, - {"fmod", 2, true, intrinsic_fmod}, - {"frac", 1, true, intrinsic_frac}, -@@ -4392,6 +4958,7 @@ intrinsic_functions[] = - {"normalize", 1, true, intrinsic_normalize}, - {"pow", 2, true, intrinsic_pow}, - {"radians", 1, true, intrinsic_radians}, -+ {"rcp", 1, true, intrinsic_rcp}, - {"reflect", 2, true, intrinsic_reflect}, - {"refract", 3, true, intrinsic_refract}, - {"round", 1, true, intrinsic_round}, -@@ -4406,12 +4973,17 @@ intrinsic_functions[] = - {"tan", 1, true, intrinsic_tan}, - {"tanh", 1, true, intrinsic_tanh}, - {"tex1D", -1, false, intrinsic_tex1D}, -+ {"tex1Dgrad", 4, false, intrinsic_tex1Dgrad}, - {"tex2D", -1, false, intrinsic_tex2D}, -+ {"tex2Dbias", 2, false, intrinsic_tex2Dbias}, -+ {"tex2Dgrad", 4, false, intrinsic_tex2Dgrad}, - {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, - {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, - {"tex3D", -1, false, intrinsic_tex3D}, -+ {"tex3Dgrad", 4, false, intrinsic_tex3Dgrad}, - {"tex3Dproj", 2, false, intrinsic_tex3Dproj}, - {"texCUBE", -1, false, intrinsic_texCUBE}, -+ {"texCUBEgrad", 4, false, intrinsic_texCUBEgrad}, - {"texCUBEproj", 2, false, intrinsic_texCUBEproj}, - {"transpose", 1, true, intrinsic_transpose}, - {"trunc", 1, true, intrinsic_trunc}, -@@ -4599,7 +5171,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, - common_type = first->data_type; - } - -- assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); - - args[0] = cond; - args[1] = first; -@@ -5481,6 +6053,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_BREAK - %token KW_BUFFER - %token KW_CASE -+%token KW_CONSTANTBUFFER - %token KW_CBUFFER - %token KW_CENTROID - %token KW_COLUMN_MAJOR -@@ -5513,6 +6086,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_NAMESPACE - %token KW_NOINTERPOLATION - %token KW_NOPERSPECTIVE -+%token KW_NULL - %token KW_OUT - %token KW_PACKOFFSET - %token KW_PASS -@@ -5566,6 +6140,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %token KW_TEXTURECUBEARRAY - %token KW_TRUE - %token KW_TYPEDEF -+%token KW_UNSIGNED - %token KW_UNIFORM - %token KW_VECTOR - %token KW_VERTEXSHADER -@@ -5670,6 +6245,8 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - - %type if_body - -+%type array -+ - %type var_modifiers - - %type any_identifier -@@ -5678,6 +6255,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h - %type name_opt - - %type parameter -+%type parameter_decl - - %type param_list - %type parameters -@@ -5717,8 +6295,7 @@ hlsl_prog: - | hlsl_prog buffer_declaration buffer_body - | hlsl_prog declaration_statement - { -- if (!list_empty(&$2->instrs)) -- hlsl_fixme(ctx, &@2, "Uniform initializer."); -+ hlsl_block_add_block(&ctx->static_initializers, $2); - destroy_block($2); - } - | hlsl_prog preproc_directive -@@ -5742,19 +6319,31 @@ pass: - - annotations_list: - variables_def_typed ';' -+ { -+ struct hlsl_block *block; -+ -+ block = initialize_vars(ctx, $1); -+ destroy_block(block); -+ } - | annotations_list variables_def_typed ';' -+ { -+ struct hlsl_block *block; -+ -+ block = initialize_vars(ctx, $2); -+ destroy_block(block); -+ } - - annotations_opt: - %empty - { - $$ = NULL; - } -- | '<' scope_start '>' -+ | '<' annotations_scope_start '>' - { - hlsl_pop_scope(ctx); - $$ = NULL; - } -- | '<' scope_start annotations_list '>' -+ | '<' annotations_scope_start annotations_list '>' - { - struct hlsl_scope *scope = ctx->cur_scope; - -@@ -6083,7 +6672,7 @@ func_declaration: - - if (!$1.first) - { -- assert(decl->parameters.count == $1.parameters.count); -+ VKD3D_ASSERT(decl->parameters.count == $1.parameters.count); - - for (i = 0; i < $1.parameters.count; ++i) - { -@@ -6198,7 +6787,7 @@ func_prototype_no_attrs: - * brittle and ugly. - */ - -- assert($5.count == params->count); -+ VKD3D_ASSERT($5.count == params->count); - for (i = 0; i < params->count; ++i) - { - struct hlsl_ir_var *orig_param = params->vars[i]; -@@ -6282,6 +6871,13 @@ switch_scope_start: - ctx->cur_scope->_switch = true; - } - -+annotations_scope_start: -+ %empty -+ { -+ hlsl_push_scope(ctx); -+ ctx->cur_scope->annotations = true; -+ } -+ - var_identifier: - VAR_IDENTIFIER - | NEW_IDENTIFIER -@@ -6315,6 +6911,9 @@ semantic: - { - char *p; - -+ if (!($$.raw_name = hlsl_strdup(ctx, $2))) -+ YYABORT; -+ - for (p = $2 + strlen($2); p > $2 && isdigit(p[-1]); --p) - ; - $$.name = $2; -@@ -6330,22 +6929,34 @@ register_reservation: - ':' KW_REGISTER '(' any_identifier ')' - { - memset(&$$, 0, sizeof($$)); -- if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) -+ if (!parse_reservation_index(ctx, $4, 0, &$$)) -+ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $4); -+ -+ vkd3d_free($4); -+ } -+ | ':' KW_REGISTER '(' any_identifier '[' expr ']' ')' -+ { -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) -+ { - hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Invalid register reservation '%s'.", $4); -+ } - - vkd3d_free($4); -+ vkd3d_free($6); - } - | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' - { - memset(&$$, 0, sizeof($$)); -- if (parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) -+ if (parse_reservation_index(ctx, $6, 0, &$$)) - { - hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); - } - else if (parse_reservation_space($6, &$$.reg_space)) - { -- if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) -+ if (!parse_reservation_index(ctx, $4, 0, &$$)) - hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Invalid register reservation '%s'.", $4); - } -@@ -6358,12 +6969,45 @@ register_reservation: - vkd3d_free($4); - vkd3d_free($6); - } -+ | ':' KW_REGISTER '(' any_identifier '[' expr ']' ',' any_identifier ')' -+ { -+ memset(&$$, 0, sizeof($$)); -+ -+ if (!parse_reservation_space($9, &$$.reg_space)) -+ hlsl_error(ctx, &@9, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register space reservation '%s'.", $9); -+ -+ if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) -+ { -+ hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $4); -+ } -+ -+ vkd3d_free($4); -+ vkd3d_free($6); -+ vkd3d_free($9); -+ } -+ | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ')' -+ { -+ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); -+ -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) -+ { -+ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $6); -+ } -+ -+ vkd3d_free($4); -+ vkd3d_free($6); -+ vkd3d_free($8); -+ } - | ':' KW_REGISTER '(' any_identifier ',' any_identifier ',' any_identifier ')' - { - hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); - - memset(&$$, 0, sizeof($$)); -- if (!parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) -+ if (!parse_reservation_index(ctx, $6, 0, &$$)) - hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Invalid register reservation '%s'.", $6); - -@@ -6375,6 +7019,26 @@ register_reservation: - vkd3d_free($6); - vkd3d_free($8); - } -+ | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ',' any_identifier ')' -+ { -+ hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); -+ -+ memset(&$$, 0, sizeof($$)); -+ if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) -+ { -+ hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register reservation '%s'.", $6); -+ } -+ -+ if (!parse_reservation_space($11, &$$.reg_space)) -+ hlsl_error(ctx, &@11, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Invalid register space reservation '%s'.", $11); -+ -+ vkd3d_free($4); -+ vkd3d_free($6); -+ vkd3d_free($8); -+ vkd3d_free($11); -+ } - - packoffset_reservation: - ':' KW_PACKOFFSET '(' any_identifier ')' -@@ -6427,6 +7091,14 @@ param_list: - } - - parameter: -+ parameter_decl -+ | parameter_decl '=' complex_initializer -+ { -+ $$ = $1; -+ $$.initializer = $3; -+ } -+ -+parameter_decl: - var_modifiers type_no_void any_identifier arrays colon_attribute - { - uint32_t modifiers = $1; -@@ -6449,11 +7121,18 @@ parameter: - } - type = hlsl_new_array_type(ctx, type, $4.sizes[i]); - } -+ vkd3d_free($4.sizes); -+ - $$.type = type; - -+ if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) -+ hlsl_fixme(ctx, &@2, "Shader model 5.1+ resource array."); -+ - $$.name = $3; - $$.semantic = $5.semantic; - $$.reg_reservation = $5.reg_reservation; -+ -+ memset(&$$.initializer, 0, sizeof($$.initializer)); - } - - texture_type: -@@ -6662,12 +7341,6 @@ type_no_void: - { - validate_texture_format_type(ctx, $3, &@3); - -- if (hlsl_version_lt(ctx, 4, 1)) -- { -- hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -- "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); -- } -- - $$ = hlsl_new_texture_type(ctx, $1, $3, 0); - } - | texture_ms_type '<' type ',' shift_expr '>' -@@ -6696,6 +7369,10 @@ type_no_void: - validate_uav_type(ctx, $1, $3, &@3); - $$ = hlsl_new_uav_type(ctx, $1, $3, true); - } -+ | KW_STRING -+ { -+ $$ = ctx->builtin_types.string; -+ } - | TYPE_IDENTIFIER - { - $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); -@@ -6713,6 +7390,26 @@ type_no_void: - } - vkd3d_free($1); - } -+ | KW_UNSIGNED TYPE_IDENTIFIER -+ { -+ struct hlsl_type *type = hlsl_get_type(ctx->cur_scope, $2, true, true); -+ -+ if (hlsl_is_numeric_type(type) && type->e.numeric.type == HLSL_TYPE_INT) -+ { -+ if (!(type = hlsl_type_clone(ctx, type, 0, 0))) -+ YYABORT; -+ vkd3d_free((void *)type->name); -+ type->name = NULL; -+ type->e.numeric.type = HLSL_TYPE_UINT; -+ } -+ else -+ { -+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "The 'unsigned' keyword can't be used with type %s.", $2); -+ } -+ -+ $$ = type; -+ } - | KW_STRUCT TYPE_IDENTIFIER - { - $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); -@@ -6724,6 +7421,10 @@ type_no_void: - { - $$ = hlsl_get_type(ctx->cur_scope, "RenderTargetView", true, true); - } -+ | KW_DEPTHSTENCILSTATE -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilState", true, true); -+ } - | KW_DEPTHSTENCILVIEW - { - $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); -@@ -6736,6 +7437,37 @@ type_no_void: - { - $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); - } -+ | KW_COMPUTESHADER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "ComputeShader", true, true); -+ } -+ | KW_DOMAINSHADER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "DomainShader", true, true); -+ } -+ | KW_HULLSHADER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "HullShader", true, true); -+ } -+ | KW_GEOMETRYSHADER -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "GeometryShader", true, true); -+ } -+ | KW_CONSTANTBUFFER '<' type '>' -+ { -+ if ($3->class != HLSL_CLASS_STRUCT) -+ hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "ConstantBuffer<...> requires user-defined structure type."); -+ $$ = hlsl_new_cb_type(ctx, $3); -+ } -+ | KW_RASTERIZERSTATE -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "RasterizerState", true, true); -+ } -+ | KW_BLENDSTATE -+ { -+ $$ = hlsl_get_type(ctx->cur_scope, "BlendState", true, true); -+ } - - type: - type_no_void -@@ -6840,10 +7572,10 @@ variables_def_typed: - { - struct parse_variable_def *head_def; - -- assert(!list_empty($1)); -+ VKD3D_ASSERT(!list_empty($1)); - head_def = LIST_ENTRY(list_head($1), struct parse_variable_def, entry); - -- assert(head_def->basic_type); -+ VKD3D_ASSERT(head_def->basic_type); - $3->basic_type = head_def->basic_type; - $3->modifiers = head_def->modifiers; - $3->modifiers_loc = head_def->modifiers_loc; -@@ -6855,7 +7587,7 @@ variables_def_typed: - } - - variable_decl: -- any_identifier arrays colon_attribute -+ any_identifier arrays colon_attribute annotations_opt - { - $$ = hlsl_alloc(ctx, sizeof(*$$)); - $$->loc = @1; -@@ -6863,6 +7595,7 @@ variable_decl: - $$->arrays = $2; - $$->semantic = $3.semantic; - $$->reg_reservation = $3.reg_reservation; -+ $$->annotations = $4; - } - - state_block_start: -@@ -6932,6 +7665,34 @@ state_block: - hlsl_src_from_node(&entry->args[i], $5.args[i]); - vkd3d_free($5.args); - -+ $$ = $1; -+ state_block_add_entry($$, entry); -+ } -+ | state_block any_identifier '(' func_arguments ')' ';' -+ { -+ struct hlsl_state_block_entry *entry; -+ unsigned int i; -+ -+ if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) -+ YYABORT; -+ -+ entry->is_function_call = true; -+ -+ entry->name = $2; -+ entry->lhs_has_index = false; -+ entry->lhs_index = 0; -+ -+ entry->instrs = $4.instrs; -+ -+ entry->args_count = $4.args_count; -+ if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) -+ YYABORT; -+ for (i = 0; i < entry->args_count; ++i) -+ hlsl_src_from_node(&entry->args[i], $4.args[i]); -+ vkd3d_free($4.args); -+ -+ hlsl_validate_state_block_entry(ctx, entry, &@4); -+ - $$ = $1; - state_block_add_entry($$, entry); - } -@@ -7020,52 +7781,43 @@ variable_def_typed: - $$->modifiers_loc = @1; - } - --arrays: -- %empty -+array: -+ '[' ']' - { -- $$.sizes = NULL; -- $$.count = 0; -+ $$ = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; - } -- | '[' expr ']' arrays -+ | '[' expr ']' - { -- uint32_t *new_array; -- unsigned int size; -- -- size = evaluate_static_expression_as_uint(ctx, $2, &@2); -- -- destroy_block($2); -- -- $$ = $4; -+ $$ = evaluate_static_expression_as_uint(ctx, $2, &@2); - -- if (!size) -+ if (!$$) - { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, - "Array size is not a positive integer constant."); -- vkd3d_free($$.sizes); - YYABORT; - } - -- if (size > 65536) -+ if ($$ > 65536) - { - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, -- "Array size %u is not between 1 and 65536.", size); -- vkd3d_free($$.sizes); -+ "Array size %u is not between 1 and 65536.", $$); - YYABORT; - } - -- if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) -- { -- vkd3d_free($$.sizes); -- YYABORT; -- } -- $$.sizes = new_array; -- $$.sizes[$$.count++] = size; -+ destroy_block($2); -+ } -+ -+arrays: -+ %empty -+ { -+ $$.sizes = NULL; -+ $$.count = 0; - } -- | '[' ']' arrays -+ | array arrays - { - uint32_t *new_array; - -- $$ = $3; -+ $$ = $2; - - if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) - { -@@ -7074,7 +7826,7 @@ arrays: - } - - $$.sizes = new_array; -- $$.sizes[$$.count++] = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; -+ $$.sizes[$$.count++] = $1; - } - - var_modifiers: -@@ -7156,6 +7908,8 @@ var_modifiers: - } - | var_identifier var_modifiers - { -+ $$ = $2; -+ - if (!strcmp($1, "precise")) - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); - else if (!strcmp($1, "single")) -@@ -7587,6 +8341,35 @@ primary_expr: - YYABORT; - } - } -+ | STRING -+ { -+ struct hlsl_ir_node *c; -+ -+ if (!(c = hlsl_new_string_constant(ctx, $1, &@1))) -+ { -+ vkd3d_free($1); -+ YYABORT; -+ } -+ vkd3d_free($1); -+ -+ if (!($$ = make_block(ctx, c))) -+ { -+ hlsl_free_instr(c); -+ YYABORT; -+ } -+ } -+ | KW_NULL -+ { -+ struct hlsl_ir_node *c; -+ -+ if (!(c = hlsl_new_null_constant(ctx, &@1))) -+ YYABORT; -+ if (!($$ = make_block(ctx, c))) -+ { -+ hlsl_free_instr(c); -+ YYABORT; -+ } -+ } - | VAR_IDENTIFIER - { - struct hlsl_ir_load *load; -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -index bdb72a1fab9..a695eefabf6 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c -@@ -20,6 +20,7 @@ - - #include "hlsl.h" - #include -+#include - - /* TODO: remove when no longer needed, only used for new_offset_instr_from_deref() */ - static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, struct hlsl_block *block, -@@ -52,7 +53,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str - - if (regset == HLSL_REGSET_NUMERIC) - { -- assert(size % 4 == 0); -+ VKD3D_ASSERT(size % 4 == 0); - size /= 4; - } - -@@ -75,7 +76,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str - - if (regset == HLSL_REGSET_NUMERIC) - { -- assert(*offset_component == 0); -+ VKD3D_ASSERT(*offset_component == 0); - *offset_component = field_offset % 4; - field_offset /= 4; - } -@@ -120,7 +121,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st - return NULL; - hlsl_block_add_instr(block, offset); - -- assert(deref->var); -+ VKD3D_ASSERT(deref->var); - type = deref->var->data_type; - - for (i = 0; i < deref->path_len; ++i) -@@ -153,8 +154,8 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der - struct hlsl_block block; - struct hlsl_type *type; - -- assert(deref->var); -- assert(!hlsl_deref_is_lowered(deref)); -+ VKD3D_ASSERT(deref->var); -+ VKD3D_ASSERT(!hlsl_deref_is_lowered(deref)); - - type = hlsl_deref_get_type(ctx, deref); - -@@ -218,6 +219,14 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, - uniform->is_uniform = 1; - uniform->is_param = temp->is_param; - uniform->buffer = temp->buffer; -+ if (temp->default_values) -+ { -+ /* Transfer default values from the temp to the uniform. */ -+ VKD3D_ASSERT(!uniform->default_values); -+ VKD3D_ASSERT(hlsl_type_component_count(temp->data_type) == hlsl_type_component_count(uniform->data_type)); -+ uniform->default_values = temp->default_values; -+ temp->default_values = NULL; -+ } - - if (!(new_name = hlsl_sprintf_alloc(ctx, "", temp->name))) - return; -@@ -312,7 +321,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir - } - } - -- if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) -+ if (!(hlsl_clone_semantic(ctx, &new_semantic, semantic))) - { - vkd3d_free(new_name); - return NULL; -@@ -390,7 +399,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - } - else - { -- assert(i == 0); -+ VKD3D_ASSERT(i == 0); - - if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, cast, 0, &var->loc))) - return; -@@ -523,7 +532,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s - } - else - { -- assert(i == 0); -+ VKD3D_ASSERT(i == 0); - - if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) - return; -@@ -918,7 +927,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun - if (return_instr) - { - /* If we're in a loop, we should have used "break" instead. */ -- assert(!in_loop); -+ VKD3D_ASSERT(!in_loop); - - /* Iterate in reverse, to avoid use-after-free when unlinking sources from - * the "uses" list. */ -@@ -940,7 +949,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun - struct hlsl_ir_load *load; - - /* If we're in a loop, we should have used "break" instead. */ -- assert(!in_loop); -+ VKD3D_ASSERT(!in_loop); - - if (tail == &cf_instr->entry) - return has_early_return; -@@ -999,7 +1008,7 @@ static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct h - struct hlsl_deref coords_deref; - struct hlsl_ir_var *coords; - -- assert(dim_count < 4); -+ VKD3D_ASSERT(dim_count < 4); - - if (!(coords = hlsl_new_synthetic_var(ctx, "coords", - hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) -@@ -1100,9 +1109,9 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - struct hlsl_resource_load_params params = {0}; - struct hlsl_ir_node *resource_load; - -- assert(coords->data_type->class == HLSL_CLASS_VECTOR); -- assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); -- assert(coords->data_type->dimx == dim_count); -+ VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ VKD3D_ASSERT(coords->data_type->dimx == dim_count); - - if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) - return false; -@@ -1132,7 +1141,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - struct hlsl_deref row_deref; - unsigned int i; - -- assert(!hlsl_type_is_row_major(mat->data_type)); -+ VKD3D_ASSERT(!hlsl_type_is_row_major(mat->data_type)); - - if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc))) - return false; -@@ -1369,7 +1378,7 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co - unsigned int component_count = hlsl_type_component_count(var->data_type); - struct copy_propagation_value *value; - -- assert(component < component_count); -+ VKD3D_ASSERT(component < component_count); - value = copy_propagation_get_value_at_time(&var_def->traces[component], time); - - if (!value) -@@ -1402,7 +1411,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h - var_def->var = var; - - res = rb_put(&state->var_defs, var, &var_def->entry); -- assert(!res); -+ VKD3D_ASSERT(!res); - - return var_def; - } -@@ -1411,7 +1420,7 @@ static void copy_propagation_trace_record_value(struct hlsl_ctx *ctx, - struct copy_propagation_component_trace *trace, struct hlsl_ir_node *node, - unsigned int component, unsigned int time) - { -- assert(!trace->record_count || trace->records[trace->record_count - 1].timestamp < time); -+ VKD3D_ASSERT(!trace->record_count || trace->records[trace->record_count - 1].timestamp < time); - - if (!hlsl_array_reserve(ctx, (void **)&trace->records, &trace->record_capacity, - trace->record_count + 1, sizeof(trace->records[0]))) -@@ -1440,7 +1449,7 @@ static void copy_propagation_invalidate_variable(struct hlsl_ctx *ctx, struct co - /* Don't add an invalidate record if it is already present. */ - if (trace->record_count && trace->records[trace->record_count - 1].timestamp == time) - { -- assert(!trace->records[trace->record_count - 1].node); -+ VKD3D_ASSERT(!trace->records[trace->record_count - 1].node); - continue; - } - -@@ -1623,27 +1632,36 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, - - switch (type->class) - { -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_SCALAR: - case HLSL_CLASS_VECTOR: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_SAMPLER: -+ case HLSL_CLASS_STRING: - case HLSL_CLASS_TEXTURE: - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_RENDER_TARGET_VIEW: -+ case HLSL_CLASS_DEPTH_STENCIL_VIEW: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_NULL: - break; - - case HLSL_CLASS_MATRIX: - case HLSL_CLASS_ARRAY: - case HLSL_CLASS_STRUCT: -+ case HLSL_CLASS_CONSTANT_BUFFER: - /* FIXME: Actually we shouldn't even get here, but we don't split - * matrices yet. */ - return false; - -- case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_PASS: -- case HLSL_CLASS_RENDER_TARGET_VIEW: -- case HLSL_CLASS_STRING: - case HLSL_CLASS_TECHNIQUE: - case HLSL_CLASS_VOID: - vkd3d_unreachable(); -@@ -1685,11 +1703,11 @@ static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, - - if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count)) - return false; -- assert(count == 1); -+ VKD3D_ASSERT(count == 1); - - if (!(value = copy_propagation_get_value(state, deref->var, start, time))) - return false; -- assert(value->component == 0); -+ VKD3D_ASSERT(value->component == 0); - - /* Only HLSL_IR_LOAD can produce an object. */ - load = hlsl_ir_load(value->node); -@@ -1970,6 +1988,76 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc - return progress; - } - -+enum validation_result -+{ -+ DEREF_VALIDATION_OK, -+ DEREF_VALIDATION_OUT_OF_BOUNDS, -+ DEREF_VALIDATION_NOT_CONSTANT, -+}; -+ -+static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx, -+ const struct hlsl_deref *deref) -+{ -+ struct hlsl_type *type = deref->var->data_type; -+ unsigned int i; -+ -+ for (i = 0; i < deref->path_len; ++i) -+ { -+ struct hlsl_ir_node *path_node = deref->path[i].node; -+ unsigned int idx = 0; -+ -+ VKD3D_ASSERT(path_node); -+ if (path_node->type != HLSL_IR_CONSTANT) -+ return DEREF_VALIDATION_NOT_CONSTANT; -+ -+ /* We should always have generated a cast to UINT. */ -+ VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR -+ && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); -+ -+ idx = hlsl_ir_constant(path_node)->value.u[0].u; -+ -+ switch (type->class) -+ { -+ case HLSL_CLASS_VECTOR: -+ if (idx >= type->dimx) -+ { -+ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -+ "Vector index is out of bounds. %u/%u", idx, type->dimx); -+ return DEREF_VALIDATION_OUT_OF_BOUNDS; -+ } -+ break; -+ -+ case HLSL_CLASS_MATRIX: -+ if (idx >= hlsl_type_major_size(type)) -+ { -+ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -+ "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); -+ return DEREF_VALIDATION_OUT_OF_BOUNDS; -+ } -+ break; -+ -+ case HLSL_CLASS_ARRAY: -+ if (idx >= type->e.array.elements_count) -+ { -+ hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -+ "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); -+ return DEREF_VALIDATION_OUT_OF_BOUNDS; -+ } -+ break; -+ -+ case HLSL_CLASS_STRUCT: -+ break; -+ -+ default: -+ vkd3d_unreachable(); -+ } -+ -+ type = hlsl_get_element_type_from_path_index(ctx, type, path_node); -+ } -+ -+ return DEREF_VALIDATION_OK; -+} -+ - static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, - const char *usage) - { -@@ -1979,7 +2067,7 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct - { - struct hlsl_ir_node *path_node = deref->path[i].node; - -- assert(path_node); -+ VKD3D_ASSERT(path_node); - if (path_node->type != HLSL_IR_CONSTANT) - hlsl_note(ctx, &path_node->loc, VKD3D_SHADER_LOG_ERROR, - "Expression for %s within \"%s\" cannot be resolved statically.", -@@ -1987,60 +2075,77 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct - } - } - --static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, -+static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - void *context) - { -- unsigned int start, count; -- -- if (instr->type == HLSL_IR_RESOURCE_LOAD) -+ switch (instr->type) - { -- struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); -- -- if (!load->resource.var->is_uniform) -+ case HLSL_IR_RESOURCE_LOAD: - { -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Loaded resource must have a single uniform source."); -+ struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); -+ -+ if (!load->resource.var->is_uniform) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Loaded resource must have a single uniform source."); -+ } -+ else if (validate_component_index_range_from_deref(ctx, &load->resource) == DEREF_VALIDATION_NOT_CONSTANT) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Loaded resource from \"%s\" must be determinable at compile time.", -+ load->resource.var->name); -+ note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); -+ } -+ -+ if (load->sampler.var) -+ { -+ if (!load->sampler.var->is_uniform) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Resource load sampler must have a single uniform source."); -+ } -+ else if (validate_component_index_range_from_deref(ctx, &load->sampler) == DEREF_VALIDATION_NOT_CONSTANT) -+ { -+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -+ "Resource load sampler from \"%s\" must be determinable at compile time.", -+ load->sampler.var->name); -+ note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); -+ } -+ } -+ break; - } -- else if (!hlsl_component_index_range_from_deref(ctx, &load->resource, &start, &count)) -+ case HLSL_IR_RESOURCE_STORE: - { -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Loaded resource from \"%s\" must be determinable at compile time.", -- load->resource.var->name); -- note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); -- } -+ struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - -- if (load->sampler.var) -- { -- if (!load->sampler.var->is_uniform) -+ if (!store->resource.var->is_uniform) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Resource load sampler must have a single uniform source."); -+ "Accessed resource must have a single uniform source."); - } -- else if (!hlsl_component_index_range_from_deref(ctx, &load->sampler, &start, &count)) -+ else if (validate_component_index_range_from_deref(ctx, &store->resource) == DEREF_VALIDATION_NOT_CONSTANT) - { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Resource load sampler from \"%s\" must be determinable at compile time.", -- load->sampler.var->name); -- note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); -+ "Accessed resource from \"%s\" must be determinable at compile time.", -+ store->resource.var->name); -+ note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); - } -+ break; - } -- } -- else if (instr->type == HLSL_IR_RESOURCE_STORE) -- { -- struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); -- -- if (!store->resource.var->is_uniform) -+ case HLSL_IR_LOAD: - { -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Accessed resource must have a single uniform source."); -+ struct hlsl_ir_load *load = hlsl_ir_load(instr); -+ validate_component_index_range_from_deref(ctx, &load->src); -+ break; - } -- else if (!hlsl_component_index_range_from_deref(ctx, &store->resource, &start, &count)) -+ case HLSL_IR_STORE: - { -- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, -- "Accessed resource from \"%s\" must be determinable at compile time.", -- store->resource.var->name); -- note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); -+ struct hlsl_ir_store *store = hlsl_ir_store(instr); -+ validate_component_index_range_from_deref(ctx, &store->lhs); -+ break; - } -+ default: -+ break; - } - - return false; -@@ -2436,7 +2541,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir - return false; - - deref = &hlsl_ir_load(instr)->src; -- assert(deref->var); -+ VKD3D_ASSERT(deref->var); - - if (deref->path_len == 0) - return false; -@@ -2510,7 +2615,7 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc - return false; - - deref = &hlsl_ir_store(instr)->lhs; -- assert(deref->var); -+ VKD3D_ASSERT(deref->var); - - if (deref->path_len == 0) - return false; -@@ -2531,6 +2636,124 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc - return false; - } - -+/* This pass flattens array (and row_major matrix) loads that include the indexing of a non-constant -+ * index into multiple constant loads, where the value of only one of them ends up in the resulting -+ * node. -+ * This is achieved through a synthetic variable. The non-constant index is compared for equality -+ * with every possible value it can have within the array bounds, and the ternary operator is used -+ * to update the value of the synthetic var when the equality check passes. */ -+static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, -+ struct hlsl_block *block) -+{ -+ struct hlsl_constant_value zero_value = {0}; -+ struct hlsl_ir_node *cut_index, *zero, *store; -+ unsigned int i, i_cut, element_count; -+ const struct hlsl_deref *deref; -+ struct hlsl_type *cut_type; -+ struct hlsl_ir_load *load; -+ struct hlsl_ir_var *var; -+ bool row_major; -+ -+ if (instr->type != HLSL_IR_LOAD) -+ return false; -+ load = hlsl_ir_load(instr); -+ deref = &load->src; -+ -+ if (deref->path_len == 0) -+ return false; -+ -+ for (i = deref->path_len - 1; ; --i) -+ { -+ if (deref->path[i].node->type != HLSL_IR_CONSTANT) -+ { -+ i_cut = i; -+ break; -+ } -+ -+ if (i == 0) -+ return false; -+ } -+ -+ cut_index = deref->path[i_cut].node; -+ cut_type = deref->var->data_type; -+ for (i = 0; i < i_cut; ++i) -+ cut_type = hlsl_get_element_type_from_path_index(ctx, cut_type, deref->path[i].node); -+ -+ row_major = hlsl_type_is_row_major(cut_type); -+ VKD3D_ASSERT(cut_type->class == HLSL_CLASS_ARRAY || row_major); -+ -+ if (!(var = hlsl_new_synthetic_var(ctx, row_major ? "row_major-load" : "array-load", instr->data_type, &instr->loc))) -+ return false; -+ -+ if (!(zero = hlsl_new_constant(ctx, instr->data_type, &zero_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, zero); -+ -+ if (!(store = hlsl_new_simple_store(ctx, var, zero))) -+ return false; -+ hlsl_block_add_instr(block, store); -+ -+ TRACE("Lowering non-constant %s load on variable '%s'.\n", row_major ? "row_major" : "array", deref->var->name); -+ -+ element_count = hlsl_type_element_count(cut_type); -+ for (i = 0; i < element_count; ++i) -+ { -+ struct hlsl_type *btype = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); -+ struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; -+ struct hlsl_ir_node *const_i, *equals, *ternary, *var_store; -+ struct hlsl_ir_load *var_load, *specific_load; -+ struct hlsl_deref deref_copy = {0}; -+ -+ if (!(const_i = hlsl_new_uint_constant(ctx, i, &cut_index->loc))) -+ return false; -+ hlsl_block_add_instr(block, const_i); -+ -+ operands[0] = cut_index; -+ operands[1] = const_i; -+ if (!(equals = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, btype, &cut_index->loc))) -+ return false; -+ hlsl_block_add_instr(block, equals); -+ -+ if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), var->data_type->dimx, equals, &cut_index->loc))) -+ return false; -+ hlsl_block_add_instr(block, equals); -+ -+ if (!(var_load = hlsl_new_var_load(ctx, var, &cut_index->loc))) -+ return false; -+ hlsl_block_add_instr(block, &var_load->node); -+ -+ if (!hlsl_copy_deref(ctx, &deref_copy, deref)) -+ return false; -+ hlsl_src_remove(&deref_copy.path[i_cut]); -+ hlsl_src_from_node(&deref_copy.path[i_cut], const_i); -+ -+ if (!(specific_load = hlsl_new_load_index(ctx, &deref_copy, NULL, &cut_index->loc))) -+ { -+ hlsl_cleanup_deref(&deref_copy); -+ return false; -+ } -+ hlsl_block_add_instr(block, &specific_load->node); -+ -+ hlsl_cleanup_deref(&deref_copy); -+ -+ operands[0] = equals; -+ operands[1] = &specific_load->node; -+ operands[2] = &var_load->node; -+ if (!(ternary = hlsl_new_expr(ctx, HLSL_OP3_TERNARY, operands, instr->data_type, &cut_index->loc))) -+ return false; -+ hlsl_block_add_instr(block, ternary); -+ -+ if (!(var_store = hlsl_new_simple_store(ctx, var, ternary))) -+ return false; -+ hlsl_block_add_instr(block, var_store); -+ } -+ -+ if (!(load = hlsl_new_var_load(ctx, var, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, &load->node); -+ -+ return true; -+} - /* Lower combined samples and sampler variables to synthesized separated textures and samplers. - * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ - static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) -@@ -2554,11 +2777,11 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - case HLSL_RESOURCE_RESINFO: - case HLSL_RESOURCE_SAMPLE_CMP: - case HLSL_RESOURCE_SAMPLE_CMP_LZ: -- case HLSL_RESOURCE_SAMPLE_GRAD: - case HLSL_RESOURCE_SAMPLE_INFO: - return false; - - case HLSL_RESOURCE_SAMPLE: -+ case HLSL_RESOURCE_SAMPLE_GRAD: - case HLSL_RESOURCE_SAMPLE_LOD: - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - case HLSL_RESOURCE_SAMPLE_PROJ: -@@ -2573,7 +2796,7 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - return false; - } - -- assert(hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_SAMPLERS); -+ VKD3D_ASSERT(hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_SAMPLERS); - - if (!(name = hlsl_get_string_buffer(ctx))) - return false; -@@ -2590,7 +2813,7 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - struct hlsl_type *arr_type = load->resource.var->data_type; - for (i = 0; i < load->resource.path_len; ++i) - { -- assert(arr_type->class == HLSL_CLASS_ARRAY); -+ VKD3D_ASSERT(arr_type->class == HLSL_CLASS_ARRAY); - texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); - arr_type = arr_type->e.array.type; - } -@@ -2619,8 +2842,8 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in - - hlsl_copy_deref(ctx, &load->sampler, &load->resource); - load->resource.var = var; -- assert(hlsl_deref_get_type(ctx, &load->resource)->class == HLSL_CLASS_TEXTURE); -- assert(hlsl_deref_get_type(ctx, &load->sampler)->class == HLSL_CLASS_SAMPLER); -+ VKD3D_ASSERT(hlsl_deref_get_type(ctx, &load->resource)->class == HLSL_CLASS_TEXTURE); -+ VKD3D_ASSERT(hlsl_deref_get_type(ctx, &load->sampler)->class == HLSL_CLASS_SAMPLER); - - return true; - } -@@ -2918,6 +3141,108 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct - return true; - } - -+/* Lower SIN/COS to SINCOS for SM1. */ -+static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *arg, *half, *two_pi, *reciprocal_two_pi, *neg_pi; -+ struct hlsl_constant_value half_value, two_pi_value, reciprocal_two_pi_value, neg_pi_value; -+ struct hlsl_ir_node *mad, *frc, *reduced; -+ struct hlsl_type *type; -+ struct hlsl_ir_expr *expr; -+ enum hlsl_ir_expr_op op; -+ struct hlsl_ir_node *sincos; -+ int i; -+ -+ if (instr->type != HLSL_IR_EXPR) -+ return false; -+ expr = hlsl_ir_expr(instr); -+ -+ if (expr->op == HLSL_OP1_SIN) -+ op = HLSL_OP1_SIN_REDUCED; -+ else if (expr->op == HLSL_OP1_COS) -+ op = HLSL_OP1_COS_REDUCED; -+ else -+ return false; -+ -+ arg = expr->operands[0].node; -+ type = arg->data_type; -+ -+ /* Reduce the range of the input angles to [-pi, pi]. */ -+ for (i = 0; i < type->dimx; ++i) -+ { -+ half_value.u[i].f = 0.5; -+ two_pi_value.u[i].f = 2.0 * M_PI; -+ reciprocal_two_pi_value.u[i].f = 1.0 / (2.0 * M_PI); -+ neg_pi_value.u[i].f = -M_PI; -+ } -+ -+ if (!(half = hlsl_new_constant(ctx, type, &half_value, &instr->loc)) -+ || !(two_pi = hlsl_new_constant(ctx, type, &two_pi_value, &instr->loc)) -+ || !(reciprocal_two_pi = hlsl_new_constant(ctx, type, &reciprocal_two_pi_value, &instr->loc)) -+ || !(neg_pi = hlsl_new_constant(ctx, type, &neg_pi_value, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, half); -+ hlsl_block_add_instr(block, two_pi); -+ hlsl_block_add_instr(block, reciprocal_two_pi); -+ hlsl_block_add_instr(block, neg_pi); -+ -+ if (!(mad = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, arg, reciprocal_two_pi, half))) -+ return false; -+ hlsl_block_add_instr(block, mad); -+ if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mad, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, frc); -+ if (!(reduced = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, frc, two_pi, neg_pi))) -+ return false; -+ hlsl_block_add_instr(block, reduced); -+ -+ if (type->dimx == 1) -+ { -+ if (!(sincos = hlsl_new_unary_expr(ctx, op, reduced, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, sincos); -+ } -+ else -+ { -+ struct hlsl_ir_node *comps[4] = {0}; -+ struct hlsl_ir_var *var; -+ struct hlsl_deref var_deref; -+ struct hlsl_ir_load *var_load; -+ -+ for (i = 0; i < type->dimx; ++i) -+ { -+ uint32_t s = hlsl_swizzle_from_writemask(1 << i); -+ -+ if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, reduced, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, comps[i]); -+ } -+ -+ if (!(var = hlsl_new_synthetic_var(ctx, "sincos", type, &instr->loc))) -+ return false; -+ hlsl_init_simple_deref_from_var(&var_deref, var); -+ -+ for (i = 0; i < type->dimx; ++i) -+ { -+ struct hlsl_block store_block; -+ -+ if (!(sincos = hlsl_new_unary_expr(ctx, op, comps[i], &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, sincos); -+ -+ if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, sincos)) -+ return false; -+ hlsl_block_add_block(block, &store_block); -+ } -+ -+ if (!(var_load = hlsl_new_load_index(ctx, &var_deref, NULL, &instr->loc))) -+ return false; -+ hlsl_block_add_instr(block, &var_load->node); -+ } -+ -+ return true; -+} -+ - static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) - { - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; -@@ -2936,7 +3261,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st - float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); - - /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ -- assert(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); - - if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &arg->loc))) - return false; -@@ -2992,7 +3317,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - return false; - } - -- assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); - - type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, - instr->data_type->dimx, instr->data_type->dimy); -@@ -3290,7 +3615,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr - return false; - - /* Narrowing casts should have already been lowered. */ -- assert(type->dimx == arg_type->dimx); -+ VKD3D_ASSERT(type->dimx == arg_type->dimx); - - zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); - if (!zero) -@@ -3312,7 +3637,7 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc - struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; - struct hlsl_ir_node *cond; - -- assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); -+ VKD3D_ASSERT(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); - - if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) - { -@@ -3511,7 +3836,7 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru - { - arg1 = expr->operands[0].node; - arg2 = expr->operands[1].node; -- assert(arg1->data_type->dimx == arg2->data_type->dimx); -+ VKD3D_ASSERT(arg1->data_type->dimx == arg2->data_type->dimx); - dimx = arg1->data_type->dimx; - is_bool = type->e.numeric.type == HLSL_TYPE_BOOL; - -@@ -3729,6 +4054,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) - case HLSL_IR_INDEX: - case HLSL_IR_LOAD: - case HLSL_IR_RESOURCE_LOAD: -+ case HLSL_IR_STRING_CONSTANT: - case HLSL_IR_SWIZZLE: - if (list_empty(&instr->uses)) - { -@@ -3786,8 +4112,8 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, - if (!deref->rel_offset.node) - return false; - -- assert(deref->var); -- assert(deref->rel_offset.node->type != HLSL_IR_CONSTANT); -+ VKD3D_ASSERT(deref->var); -+ VKD3D_ASSERT(deref->rel_offset.node->type != HLSL_IR_CONSTANT); - deref->var->indexable = true; - - return true; -@@ -3815,15 +4141,16 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -+ const struct hlsl_reg_reservation *reservation = &var->reg_reservation; - unsigned int r; - -- if (var->reg_reservation.reg_type) -+ if (reservation->reg_type) - { - for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r) - { - if (var->regs[r].allocation_size > 0) - { -- if (var->reg_reservation.reg_type != get_regset_name(r)) -+ if (reservation->reg_type != get_regset_name(r)) - { - struct vkd3d_string_buffer *type_string; - -@@ -3839,10 +4166,8 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) - else - { - var->regs[r].allocated = true; -- var->regs[r].id = var->reg_reservation.reg_index; -- TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, -- var->reg_reservation.reg_index, var->reg_reservation.reg_type, -- var->reg_reservation.reg_index + var->regs[r].allocation_size); -+ var->regs[r].space = reservation->reg_space; -+ var->regs[r].index = reservation->reg_index; - } - } - } -@@ -4010,6 +4335,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop - break; - } - case HLSL_IR_CONSTANT: -+ case HLSL_IR_STRING_CONSTANT: - break; - } - } -@@ -4111,7 +4437,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a - unsigned int writemask; - uint32_t reg_idx; - -- assert(component_count <= reg_size); -+ VKD3D_ASSERT(component_count <= reg_size); - - for (reg_idx = 0;; ++reg_idx) - { -@@ -4133,6 +4459,30 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a - return ret; - } - -+/* Allocate a register with writemask, while reserving reg_writemask. */ -+static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, -+ unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask) -+{ -+ struct hlsl_reg ret = {0}; -+ uint32_t reg_idx; -+ -+ VKD3D_ASSERT((reg_writemask & writemask) == writemask); -+ -+ for (reg_idx = 0;; ++reg_idx) -+ { -+ if ((get_available_writemask(allocator, first_write, last_read, reg_idx) & reg_writemask) == reg_writemask) -+ break; -+ } -+ -+ record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read); -+ -+ ret.id = reg_idx; -+ ret.allocation_size = 1; -+ ret.writemask = writemask; -+ ret.allocated = true; -+ return ret; -+} -+ - static bool is_range_available(const struct register_allocator *allocator, - unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) - { -@@ -4181,8 +4531,10 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, - { - unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; - -+ /* FIXME: We could potentially pack structs or arrays more efficiently... */ -+ - if (type->class <= HLSL_CLASS_VECTOR) -- return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); -+ return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx); - else - return allocate_range(ctx, allocator, first_write, last_read, reg_size); - } -@@ -4224,7 +4576,7 @@ static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hls - { - enum hlsl_sampler_dim dim; - -- assert(!load->sampler.var); -+ VKD3D_ASSERT(!load->sampler.var); - - dim = var->objects_usage[regset][index].sampler_dim; - if (dim != load->sampling_dim) -@@ -4334,6 +4686,44 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) - } - } - -+static void allocate_instr_temp_register(struct hlsl_ctx *ctx, -+ struct hlsl_ir_node *instr, struct register_allocator *allocator) -+{ -+ unsigned int reg_writemask = 0, dst_writemask = 0; -+ -+ if (instr->reg.allocated || !instr->last_read) -+ return; -+ -+ if (instr->type == HLSL_IR_EXPR) -+ { -+ switch (hlsl_ir_expr(instr)->op) -+ { -+ case HLSL_OP1_COS_REDUCED: -+ dst_writemask = VKD3DSP_WRITEMASK_0; -+ reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_0; -+ break; -+ -+ case HLSL_OP1_SIN_REDUCED: -+ dst_writemask = VKD3DSP_WRITEMASK_1; -+ reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_1; -+ break; -+ -+ default: -+ break; -+ } -+ } -+ -+ if (reg_writemask) -+ instr->reg = allocate_register_with_masks(ctx, allocator, -+ instr->index, instr->last_read, reg_writemask, dst_writemask); -+ else -+ instr->reg = allocate_numeric_registers_for_type(ctx, allocator, -+ instr->index, instr->last_read, instr->data_type); -+ -+ TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, -+ debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); -+} -+ - static void allocate_variable_temp_register(struct hlsl_ctx *ctx, - struct hlsl_ir_var *var, struct register_allocator *allocator) - { -@@ -4373,13 +4763,7 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, - if (ctx->profile->major_version >= 4 && instr->type == HLSL_IR_CONSTANT) - continue; - -- if (!instr->reg.allocated && instr->last_read) -- { -- instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, -- instr->data_type); -- TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, -- debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); -- } -+ allocate_instr_temp_register(ctx, instr, allocator); - - switch (instr->type) - { -@@ -4474,9 +4858,9 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, - constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); - TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); - -- assert(hlsl_is_numeric_type(type)); -- assert(type->dimy == 1); -- assert(constant->reg.writemask); -+ VKD3D_ASSERT(hlsl_is_numeric_type(type)); -+ VKD3D_ASSERT(type->dimy == 1); -+ VKD3D_ASSERT(constant->reg.writemask); - - for (x = 0, i = 0; x < 4; ++x) - { -@@ -4587,8 +4971,46 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) - list_move_tail(&ctx->extern_vars, &sorted); - } - -+/* In SM2, 'sincos' expects specific constants as src1 and src2 arguments. -+ * These have to be referenced directly, i.e. as 'c' not 'r'. */ -+static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct register_allocator *allocator) -+{ -+ const struct hlsl_ir_node *instr; -+ struct hlsl_type *type; -+ -+ if (ctx->profile->major_version >= 3) -+ return; -+ -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ if (instr->type == HLSL_IR_EXPR && (hlsl_ir_expr(instr)->op == HLSL_OP1_SIN_REDUCED -+ || hlsl_ir_expr(instr)->op == HLSL_OP1_COS_REDUCED)) -+ { -+ type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); -+ -+ ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); -+ TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); -+ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f); -+ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f); -+ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f); -+ record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f); -+ -+ ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); -+ TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type)); -+ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f); -+ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f); -+ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f); -+ record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f); -+ -+ return; -+ } -+ } -+} -+ - static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) - { -+ struct register_allocator allocator_used = {0}; - struct register_allocator allocator = {0}; - struct hlsl_ir_var *var; - -@@ -4597,6 +5019,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int bind_count = var->bind_count[HLSL_REGSET_NUMERIC]; - - if (!var->is_uniform || reg_size == 0) - continue; -@@ -4606,15 +5029,18 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - unsigned int reg_idx = var->reg_reservation.reg_index; - unsigned int i; - -- assert(reg_size % 4 == 0); -+ VKD3D_ASSERT(reg_size % 4 == 0); - for (i = 0; i < reg_size / 4; ++i) - { -- if (get_available_writemask(&allocator, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) -+ if (i < bind_count) - { -- hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -- "Overlapping register() reservations on 'c%u'.", reg_idx + i); -+ if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Overlapping register() reservations on 'c%u'.", reg_idx + i); -+ } -+ record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); - } -- - record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); - } - -@@ -4627,6 +5053,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - } - } - -+ vkd3d_free(allocator_used.allocations); -+ - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { - unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC]; -@@ -4644,6 +5072,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi - - allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); - -+ allocate_sincos_const_registers(ctx, &entry_func->body, &allocator); -+ - vkd3d_free(allocator.allocations); - } - -@@ -4693,11 +5123,11 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - uint32_t reg; - bool builtin; - -- assert(var->semantic.name); -+ VKD3D_ASSERT(var->semantic.name); - - if (ctx->profile->major_version < 4) - { -- D3DSHADER_PARAM_REGISTER_TYPE sm1_type; -+ struct vkd3d_shader_version version; - D3DDECLUSAGE usage; - uint32_t usage_idx; - -@@ -4705,8 +5135,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) - return; - -- builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &sm1_type, ®); -- if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ builtin = hlsl_sm1_register_from_semantic(&version, -+ var->semantic.name, var->semantic.index, output, &type, ®); -+ if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Invalid semantic '%s'.", var->semantic.name); -@@ -4715,7 +5149,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var - - if ((!output && !var->last_read) || (output && !var->first_write)) - return; -- type = (enum vkd3d_shader_register_type)sm1_type; - } - else - { -@@ -4762,13 +5195,14 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx) - } - } - --static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t index) -+static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t space, uint32_t index) - { - const struct hlsl_buffer *buffer; - - LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) - { -- if (buffer->used_size && buffer->reservation.reg_type == 'b' && buffer->reservation.reg_index == index) -+ if (buffer->reservation.reg_type == 'b' -+ && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index) - return buffer; - } - return NULL; -@@ -4783,6 +5217,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va - if (register_reservation) - { - var->buffer_offset = 4 * var->reg_reservation.reg_index; -+ var->has_explicit_bind_point = 1; - } - else - { -@@ -4815,6 +5250,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va - } - } - var->buffer_offset = var->reg_reservation.offset_index; -+ var->has_explicit_bind_point = 1; - } - else - { -@@ -4913,11 +5349,19 @@ void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx) - } - } - -+static unsigned int get_max_cbuffer_reg_index(struct hlsl_ctx *ctx) -+{ -+ if (hlsl_version_ge(ctx, 5, 1)) -+ return UINT_MAX; -+ -+ return 13; -+} -+ - static void allocate_buffers(struct hlsl_ctx *ctx) - { - struct hlsl_buffer *buffer; -+ uint32_t index = 0, id = 0; - struct hlsl_ir_var *var; -- uint32_t index = 0; - - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) - { -@@ -4938,32 +5382,59 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - - if (buffer->type == HLSL_BUFFER_CONSTANT) - { -- if (buffer->reservation.reg_type == 'b') -+ const struct hlsl_reg_reservation *reservation = &buffer->reservation; -+ -+ if (reservation->reg_type == 'b') - { -- const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.reg_index); -+ const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, -+ reservation->reg_space, reservation->reg_index); -+ unsigned int max_index = get_max_cbuffer_reg_index(ctx); -+ -+ if (buffer->reservation.reg_index > max_index) -+ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Buffer reservation cb%u exceeds target's maximum (cb%u).", -+ buffer->reservation.reg_index, max_index); - - if (reserved_buffer && reserved_buffer != buffer) - { - hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, -- "Multiple buffers bound to cb%u.", buffer->reservation.reg_index); -+ "Multiple buffers bound to space %u, index %u.", -+ reservation->reg_space, reservation->reg_index); - hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, -- "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.reg_index); -+ "Buffer %s is already bound to space %u, index %u.", -+ reserved_buffer->name, reservation->reg_space, reservation->reg_index); - } - -- buffer->reg.id = buffer->reservation.reg_index; -+ buffer->reg.space = reservation->reg_space; -+ buffer->reg.index = reservation->reg_index; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ buffer->reg.id = id++; -+ else -+ buffer->reg.id = buffer->reg.index; - buffer->reg.allocation_size = 1; - buffer->reg.allocated = true; -- TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); -+ TRACE("Allocated reserved %s to space %u, index %u, id %u.\n", -+ buffer->name, buffer->reg.space, buffer->reg.index, buffer->reg.id); - } -- else if (!buffer->reservation.reg_type) -+ else if (!reservation->reg_type) - { -- while (get_reserved_buffer(ctx, index)) -+ unsigned int max_index = get_max_cbuffer_reg_index(ctx); -+ while (get_reserved_buffer(ctx, 0, index)) - ++index; - -- buffer->reg.id = index; -+ if (index > max_index) -+ hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, -+ "Too many buffers allocated, target's maximum is %u.", max_index); -+ -+ buffer->reg.space = 0; -+ buffer->reg.index = index; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ buffer->reg.id = id++; -+ else -+ buffer->reg.id = buffer->reg.index; - buffer->reg.allocation_size = 1; - buffer->reg.allocated = true; -- TRACE("Allocated %s to cb%u.\n", buffer->name, index); -+ TRACE("Allocated %s to space 0, index %u, id %u.\n", buffer->name, buffer->reg.index, buffer->reg.id); - ++index; - } - else -@@ -4980,7 +5451,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) - } - - static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, -- uint32_t index, bool allocated_only) -+ uint32_t space, uint32_t index, bool allocated_only) - { - const struct hlsl_ir_var *var; - unsigned int start, count; -@@ -4995,12 +5466,18 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - start = var->reg_reservation.reg_index; - count = var->data_type->reg_size[regset]; - -+ if (var->reg_reservation.reg_space != space) -+ continue; -+ - if (!var->regs[regset].allocated && allocated_only) - continue; - } - else if (var->regs[regset].allocated) - { -- start = var->regs[regset].id; -+ if (var->regs[regset].space != space) -+ continue; -+ -+ start = var->regs[regset].index; - count = var->regs[regset].allocation_size; - } - else -@@ -5017,8 +5494,8 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum - static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - { - char regset_name = get_regset_name(regset); -+ uint32_t min_index = 0, id = 0; - struct hlsl_ir_var *var; -- uint32_t min_index = 0; - - if (regset == HLSL_REGSET_UAVS) - { -@@ -5041,35 +5518,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - if (var->regs[regset].allocated) - { - const struct hlsl_ir_var *reserved_object, *last_reported = NULL; -- unsigned int index, i; -+ unsigned int i; - -- if (var->regs[regset].id < min_index) -+ if (var->regs[regset].index < min_index) - { -- assert(regset == HLSL_REGSET_UAVS); -+ VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "UAV index (%u) must be higher than the maximum render target index (%u).", -- var->regs[regset].id, min_index - 1); -+ var->regs[regset].index, min_index - 1); - continue; - } - - for (i = 0; i < count; ++i) - { -- index = var->regs[regset].id + i; -+ unsigned int space = var->regs[regset].space; -+ unsigned int index = var->regs[regset].index + i; - - /* get_allocated_object() may return "var" itself, but we - * actually want that, otherwise we'll end up reporting the - * same conflict between the same two variables twice. */ -- reserved_object = get_allocated_object(ctx, regset, index, true); -+ reserved_object = get_allocated_object(ctx, regset, space, index, true); - if (reserved_object && reserved_object != var && reserved_object != last_reported) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, -- "Multiple variables bound to %c%u.", regset_name, index); -+ "Multiple variables bound to space %u, %c%u.", regset_name, space, index); - hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, -- "Variable '%s' is already bound to %c%u.", reserved_object->name, -- regset_name, index); -+ "Variable '%s' is already bound to space %u, %c%u.", -+ reserved_object->name, regset_name, space, index); - last_reported = reserved_object; - } - } -+ -+ if (hlsl_version_ge(ctx, 5, 1)) -+ var->regs[regset].id = id++; -+ else -+ var->regs[regset].id = var->regs[regset].index; -+ TRACE("Allocated reserved variable %s to space %u, indices %c%u-%c%u, id %u.\n", -+ var->name, var->regs[regset].space, regset_name, var->regs[regset].index, -+ regset_name, var->regs[regset].index + count, var->regs[regset].id); - } - else - { -@@ -5078,7 +5564,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - - while (available < count) - { -- if (get_allocated_object(ctx, regset, index, false)) -+ if (get_allocated_object(ctx, regset, 0, index, false)) - available = 0; - else - ++available; -@@ -5086,10 +5572,15 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) - } - index -= count; - -- var->regs[regset].id = index; -+ var->regs[regset].space = 0; -+ var->regs[regset].index = index; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ var->regs[regset].id = id++; -+ else -+ var->regs[regset].id = var->regs[regset].index; - var->regs[regset].allocated = true; -- TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, -- index + count); -+ TRACE("Allocated variable %s to space 0, indices %c%u-%c%u, id %u.\n", var->name, -+ regset_name, index, regset_name, index + count, var->regs[regset].id); - ++index; - } - } -@@ -5109,12 +5600,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - struct hlsl_ir_node *path_node = deref->path[i].node; - unsigned int idx = 0; - -- assert(path_node); -+ VKD3D_ASSERT(path_node); - if (path_node->type != HLSL_IR_CONSTANT) - return false; - - /* We should always have generated a cast to UINT. */ -- assert(path_node->data_type->class == HLSL_CLASS_SCALAR -+ VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR - && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - - idx = hlsl_ir_constant(path_node)->value.u[0].u; -@@ -5123,21 +5614,13 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - { - case HLSL_CLASS_VECTOR: - if (idx >= type->dimx) -- { -- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -- "Vector index is out of bounds. %u/%u", idx, type->dimx); - return false; -- } - *start += idx; - break; - - case HLSL_CLASS_MATRIX: - if (idx >= hlsl_type_major_size(type)) -- { -- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -- "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); - return false; -- } - if (hlsl_type_is_row_major(type)) - *start += idx * type->dimx; - else -@@ -5146,11 +5629,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl - - case HLSL_CLASS_ARRAY: - if (idx >= type->e.array.elements_count) -- { -- hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -- "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); - return false; -- } - *start += idx * hlsl_type_component_count(type->e.array.type); - break; - -@@ -5186,11 +5665,11 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref - struct hlsl_ir_node *path_node = deref->path[i].node; - unsigned int idx = 0; - -- assert(path_node); -+ VKD3D_ASSERT(path_node); - if (path_node->type == HLSL_IR_CONSTANT) - { - /* We should always have generated a cast to UINT. */ -- assert(path_node->data_type->class == HLSL_CLASS_SCALAR -+ VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR - && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - - idx = hlsl_ir_constant(path_node)->value.u[0].u; -@@ -5240,8 +5719,8 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref - type = hlsl_get_element_type_from_path_index(ctx, type, path_node); - } - -- assert(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); -- assert(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); -+ VKD3D_ASSERT(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); -+ VKD3D_ASSERT(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); - return index_is_constant; - } - -@@ -5256,16 +5735,17 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref - if (offset_node) - { - /* We should always have generated a cast to UINT. */ -- assert(offset_node->data_type->class == HLSL_CLASS_SCALAR -+ VKD3D_ASSERT(offset_node->data_type->class == HLSL_CLASS_SCALAR - && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT); -- assert(offset_node->type != HLSL_IR_CONSTANT); -+ VKD3D_ASSERT(offset_node->type != HLSL_IR_CONSTANT); - return false; - } - - size = deref->var->data_type->reg_size[regset]; - if (*offset >= size) - { -- hlsl_error(ctx, &offset_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, -+ /* FIXME: Report a more specific location for the constant deref. */ -+ hlsl_error(ctx, &deref->var->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Dereference is out of bounds. %u/%u", *offset, size); - return false; - } -@@ -5280,8 +5760,9 @@ unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl - if (hlsl_offset_from_deref(ctx, deref, &offset)) - return offset; - -- hlsl_fixme(ctx, &deref->rel_offset.node->loc, "Dereference with non-constant offset of type %s.", -- hlsl_node_type_to_string(deref->rel_offset.node->type)); -+ if (deref->rel_offset.node) -+ hlsl_fixme(ctx, &deref->rel_offset.node->loc, "Dereference with non-constant offset of type %s.", -+ hlsl_node_type_to_string(deref->rel_offset.node->type)); - - return 0; - } -@@ -5292,9 +5773,10 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere - struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - -- assert(deref->data_type); -- assert(hlsl_is_numeric_type(deref->data_type)); -+ VKD3D_ASSERT(deref->data_type); -+ VKD3D_ASSERT(hlsl_is_numeric_type(deref->data_type)); - -+ ret.index += offset / 4; - ret.id += offset / 4; - - ret.writemask = 0xf & (0xf << (offset % 4)); -@@ -5446,6 +5928,414 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) - } while (progress); - } - -+static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, -+ struct vsir_program *program, bool output, struct hlsl_ir_var *var) -+{ -+ enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; -+ enum vkd3d_shader_register_type type; -+ struct shader_signature *signature; -+ struct signature_element *element; -+ unsigned int register_index, mask; -+ -+ if ((!output && !var->last_read) || (output && !var->first_write)) -+ return; -+ -+ if (output) -+ signature = &program->output_signature; -+ else -+ signature = &program->input_signature; -+ -+ if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, -+ signature->element_count + 1, sizeof(*signature->elements))) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ element = &signature->elements[signature->element_count++]; -+ -+ if (!hlsl_sm1_register_from_semantic(&program->shader_version, -+ var->semantic.name, var->semantic.index, output, &type, ®ister_index)) -+ { -+ unsigned int usage, usage_idx; -+ bool ret; -+ -+ register_index = var->regs[HLSL_REGSET_NUMERIC].id; -+ -+ ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); -+ VKD3D_ASSERT(ret); -+ /* With the exception of vertex POSITION output, none of these are -+ * system values. Pixel POSITION input is not equivalent to -+ * SV_Position; the closer equivalent is VPOS, which is not declared -+ * as a semantic. */ -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX -+ && output && usage == VKD3D_DECL_USAGE_POSITION) -+ sysval = VKD3D_SHADER_SV_POSITION; -+ } -+ mask = (1 << var->data_type->dimx) - 1; -+ -+ memset(element, 0, sizeof(*element)); -+ if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) -+ { -+ --signature->element_count; -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ element->semantic_index = var->semantic.index; -+ element->sysval_semantic = sysval; -+ element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; -+ element->register_index = register_index; -+ element->target_location = register_index; -+ element->register_count = 1; -+ element->mask = mask; -+ element->used_mask = mask; -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) -+ element->interpolation_mode = VKD3DSIM_LINEAR; -+} -+ -+static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) -+{ -+ struct hlsl_ir_var *var; -+ -+ LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) -+ { -+ if (var->is_input_semantic) -+ sm1_generate_vsir_signature_entry(ctx, program, false, var); -+ if (var->is_output_semantic) -+ sm1_generate_vsir_signature_entry(ctx, program, true, var); -+ } -+} -+ -+/* OBJECTIVE: Translate all the information from ctx and entry_func to the -+ * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() -+ * without relying on ctx and entry_func. */ -+static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, -+ uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) -+{ -+ struct vkd3d_shader_version version = {0}; -+ struct vkd3d_bytecode_buffer buffer = {0}; -+ -+ version.major = ctx->profile->major_version; -+ version.minor = ctx->profile->minor_version; -+ version.type = ctx->profile->type; -+ if (!vsir_program_init(program, NULL, &version, 0)) -+ { -+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; -+ return; -+ } -+ -+ write_sm1_uniforms(ctx, &buffer); -+ if (buffer.status) -+ { -+ vkd3d_free(buffer.data); -+ ctx->result = buffer.status; -+ return; -+ } -+ ctab->code = buffer.data; -+ ctab->size = buffer.size; -+ -+ sm1_generate_vsir_signature(ctx, program); -+} -+ -+static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, -+ struct hlsl_block **found_block) -+{ -+ struct hlsl_ir_node *node; -+ -+ LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ if (node == stop_point) -+ return NULL; -+ -+ if (node->type == HLSL_IR_IF) -+ { -+ struct hlsl_ir_if *iff = hlsl_ir_if(node); -+ struct hlsl_ir_jump *jump = NULL; -+ -+ if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) -+ return jump; -+ if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) -+ return jump; -+ } -+ else if (node->type == HLSL_IR_JUMP) -+ { -+ struct hlsl_ir_jump *jump = hlsl_ir_jump(node); -+ -+ if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) -+ { -+ *found_block = block; -+ return jump; -+ } -+ } -+ } -+ -+ return NULL; -+} -+ -+static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) -+{ -+ /* Always use the explicit limit if it has been passed. */ -+ if (loop->unroll_limit) -+ return loop->unroll_limit; -+ -+ /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ -+ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ return 1024; -+ -+ /* SM4 limits implicit unrolling to 254 iterations. */ -+ if (hlsl_version_ge(ctx, 4, 0)) -+ return 254; -+ -+ /* SM<3 implicitly unrolls up to 1024 iterations. */ -+ return 1024; -+} -+ -+static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) -+{ -+ unsigned int max_iterations, i; -+ -+ max_iterations = loop_unrolling_get_max_iterations(ctx, loop); -+ -+ for (i = 0; i < max_iterations; ++i) -+ { -+ struct hlsl_block tmp_dst, *jump_block; -+ struct hlsl_ir_jump *jump = NULL; -+ -+ if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) -+ return false; -+ list_move_before(&loop->node.entry, &tmp_dst.instrs); -+ hlsl_block_cleanup(&tmp_dst); -+ -+ hlsl_run_const_passes(ctx, block); -+ -+ if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block))) -+ { -+ enum hlsl_ir_jump_type type = jump->type; -+ -+ if (jump_block != loop_parent) -+ { -+ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, -+ "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported."); -+ return false; -+ } -+ -+ list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry)); -+ hlsl_block_cleanup(&tmp_dst); -+ -+ if (type == HLSL_IR_JUMP_BREAK) -+ break; -+ } -+ } -+ -+ /* Native will not emit an error if max_iterations has been reached with an -+ * explicit limit. It also will not insert a loop if there are iterations left -+ * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ -+ if (!loop->unroll_limit && i == max_iterations) -+ { -+ if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, -+ "Unable to unroll loop, maximum iterations reached (%u).", max_iterations); -+ return false; -+ } -+ -+ list_remove(&loop->node.entry); -+ hlsl_free_instr(&loop->node); -+ -+ return true; -+} -+ -+/* -+ * loop_unrolling_find_unrollable_loop() is not the normal way to do things; -+ * normal passes simply iterate over the whole block and apply a transformation -+ * to every relevant instruction. However, loop unrolling can fail, and we want -+ * to leave the loop in its previous state in that case. That isn't a problem by -+ * itself, except that loop unrolling needs copy-prop in order to work properly, -+ * and copy-prop state at the time of the loop depends on the rest of the program -+ * up to that point. This means we need to clone the whole program, and at that -+ * point we have to search it again anyway to find the clone of the loop we were -+ * going to unroll. -+ * -+ * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop -+ * up until the loop instruction, clone just that loop, then use copyprop again -+ * with the saved state after unrolling. However, copyprop currently isn't built -+ * for that yet [notably, it still relies on indices]. Note also this still doesn't -+ * really let us use transform_ir() anyway [since we don't have a good way to say -+ * "copyprop from the beginning of the program up to the instruction we're -+ * currently processing" from the callback]; we'd have to use a dedicated -+ * recursive function instead. */ -+static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, -+ struct hlsl_block **containing_block) -+{ -+ struct hlsl_ir_node *instr; -+ -+ LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) -+ { -+ switch (instr->type) -+ { -+ case HLSL_IR_LOOP: -+ { -+ struct hlsl_ir_loop *nested_loop; -+ struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); -+ -+ if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) -+ return nested_loop; -+ -+ if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) -+ { -+ *containing_block = block; -+ return loop; -+ } -+ -+ break; -+ } -+ case HLSL_IR_IF: -+ { -+ struct hlsl_ir_loop *loop; -+ struct hlsl_ir_if *iff = hlsl_ir_if(instr); -+ -+ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) -+ return loop; -+ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) -+ return loop; -+ -+ break; -+ } -+ case HLSL_IR_SWITCH: -+ { -+ struct hlsl_ir_switch *s = hlsl_ir_switch(instr); -+ struct hlsl_ir_switch_case *c; -+ struct hlsl_ir_loop *loop; -+ -+ LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) -+ { -+ if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) -+ return loop; -+ } -+ -+ break; -+ } -+ default: -+ break; -+ } -+ } -+ -+ return NULL; -+} -+ -+static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) -+{ -+ while (true) -+ { -+ struct hlsl_block clone, *containing_block; -+ struct hlsl_ir_loop *loop, *cloned_loop; -+ -+ if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) -+ return; -+ -+ if (!hlsl_clone_block(ctx, &clone, block)) -+ return; -+ -+ cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); -+ VKD3D_ASSERT(cloned_loop); -+ -+ if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) -+ { -+ hlsl_block_cleanup(&clone); -+ loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; -+ continue; -+ } -+ -+ hlsl_block_cleanup(block); -+ hlsl_block_init(block); -+ hlsl_block_add_block(block, &clone); -+ } -+} -+ -+static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) -+{ -+ struct hlsl_ir_node *call, *rhs, *store; -+ struct hlsl_ir_function_decl *func; -+ unsigned int component_count; -+ struct hlsl_ir_load *load; -+ struct hlsl_ir_expr *expr; -+ struct hlsl_ir_var *lhs; -+ char *body; -+ -+ static const char template[] = -+ "typedef uint%u uintX;\n" -+ "float%u soft_f16tof32(uintX x)\n" -+ "{\n" -+ " uintX mantissa = x & 0x3ff;\n" -+ " uintX high2 = mantissa >> 8;\n" -+ " uintX high2_check = high2 ? high2 : mantissa;\n" -+ " uintX high6 = high2_check >> 4;\n" -+ " uintX high6_check = high6 ? high6 : high2_check;\n" -+ "\n" -+ " uintX high8 = high6_check >> 2;\n" -+ " uintX high8_check = (high8 ? high8 : high6_check) >> 1;\n" -+ " uintX shift = high6 ? (high2 ? 12 : 4) : (high2 ? 8 : 0);\n" -+ " shift = high8 ? shift + 2 : shift;\n" -+ " shift = high8_check ? shift + 1 : shift;\n" -+ " shift = -shift + 10;\n" -+ " shift = mantissa ? shift : 11;\n" -+ " uintX subnormal_mantissa = ((mantissa << shift) << 23) & 0x7fe000;\n" -+ " uintX subnormal_exp = -(shift << 23) + 0x38800000;\n" -+ " uintX subnormal_val = subnormal_exp + subnormal_mantissa;\n" -+ " uintX subnormal_or_zero = mantissa ? subnormal_val : 0;\n" -+ "\n" -+ " uintX exponent = (((x >> 10) << 23) & 0xf800000) + 0x38000000;\n" -+ "\n" -+ " uintX low_3 = (x << 13) & 0x7fe000;\n" -+ " uintX normalized_val = exponent + low_3;\n" -+ " uintX inf_nan_val = low_3 + 0x7f800000;\n" -+ "\n" -+ " uintX exp_mask = 0x7c00;\n" -+ " uintX is_inf_nan = (x & exp_mask) == exp_mask;\n" -+ " uintX is_normalized = x & exp_mask;\n" -+ "\n" -+ " uintX check = is_inf_nan ? inf_nan_val : normalized_val;\n" -+ " uintX exp_mantissa = (is_normalized ? check : subnormal_or_zero) & 0x7fffe000;\n" -+ " uintX sign_bit = (x << 16) & 0x80000000;\n" -+ "\n" -+ " return asfloat(exp_mantissa + sign_bit);\n" -+ "}\n"; -+ -+ -+ if (node->type != HLSL_IR_EXPR) -+ return false; -+ -+ expr = hlsl_ir_expr(node); -+ -+ if (expr->op != HLSL_OP1_F16TOF32) -+ return false; -+ -+ rhs = expr->operands[0].node; -+ component_count = hlsl_type_component_count(rhs->data_type); -+ -+ if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) -+ return false; -+ -+ if (!(func = hlsl_compile_internal_function(ctx, "soft_f16tof32", body))) -+ return false; -+ -+ lhs = func->parameters.vars[0]; -+ -+ if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) -+ return false; -+ hlsl_block_add_instr(block, store); -+ -+ if (!(call = hlsl_new_call(ctx, func, &node->loc))) -+ return false; -+ hlsl_block_add_instr(block, call); -+ -+ if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) -+ return false; -+ hlsl_block_add_instr(block, &load->node); -+ -+ return true; -+} -+ - int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, - enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) - { -@@ -5466,6 +6356,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - if (ctx->result) - return ctx->result; - -+ if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) -+ lower_ir(ctx, lower_f16tof32, body); -+ - lower_return(ctx, entry_func, body, false); - - while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); -@@ -5532,6 +6425,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); - } - -+ transform_unroll_loops(ctx, body); - hlsl_run_const_passes(ctx, body); - - remove_unreachable_code(ctx, body); -@@ -5541,7 +6435,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - lower_ir(ctx, lower_casts_to_bool, body); - lower_ir(ctx, lower_int_dot, body); - -- hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); -+ hlsl_transform_ir(ctx, validate_dereferences, body, NULL); - hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); - if (profile->major_version >= 4) - hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); -@@ -5555,6 +6449,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - - if (profile->major_version < 4) - { -+ while (lower_ir(ctx, lower_nonconstant_array_loads, body)); -+ - lower_ir(ctx, lower_ternary, body); - - lower_ir(ctx, lower_nonfloat_exprs, body); -@@ -5569,6 +6465,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - lower_ir(ctx, lower_round, body); - lower_ir(ctx, lower_ceil, body); - lower_ir(ctx, lower_floor, body); -+ lower_ir(ctx, lower_trig, body); - lower_ir(ctx, lower_comparison_operators, body); - lower_ir(ctx, lower_logic_not, body); - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) -@@ -5628,7 +6525,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry - switch (target_type) - { - case VKD3D_SHADER_TARGET_D3D_BYTECODE: -- return hlsl_sm1_write(ctx, entry_func, out); -+ { -+ uint32_t config_flags = vkd3d_shader_init_config_flags(); -+ struct vkd3d_shader_code ctab = {0}; -+ struct vsir_program program; -+ int result; -+ -+ sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab); -+ if (ctx->result) -+ { -+ vsir_program_cleanup(&program); -+ vkd3d_shader_free_shader_code(&ctab); -+ return ctx->result; -+ } -+ -+ result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context, ctx, entry_func); -+ vsir_program_cleanup(&program); -+ vkd3d_shader_free_shader_code(&ctab); -+ return result; -+ } - - case VKD3D_SHADER_TARGET_DXBC_TPF: - return hlsl_sm4_write(ctx, entry_func, out); -diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -index 16015fa8a81..db4913b7c62 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c -@@ -28,7 +28,7 @@ static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -119,7 +119,7 @@ static bool fold_bit_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -234,7 +234,7 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -260,7 +260,7 @@ static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -286,7 +286,7 @@ static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -313,7 +313,7 @@ static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - unsigned int k; - float i; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -339,7 +339,7 @@ static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -384,7 +384,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -418,7 +418,7 @@ static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -442,7 +442,7 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -487,7 +487,7 @@ static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -527,7 +527,7 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -553,7 +553,7 @@ static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -598,8 +598,8 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -635,8 +635,8 @@ static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -662,8 +662,8 @@ static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -689,8 +689,8 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -715,9 +715,9 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -- assert(src1->node.data_type->dimx == src2->node.data_type->dimx); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); - - dst->u[0].f = 0.0f; - for (k = 0; k < src1->node.data_type->dimx; ++k) -@@ -743,11 +743,11 @@ static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -- assert(type == src3->node.data_type->e.numeric.type); -- assert(src1->node.data_type->dimx == src2->node.data_type->dimx); -- assert(src3->node.data_type->dimx == 1); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src3->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); -+ VKD3D_ASSERT(src3->node.data_type->dimx == 1); - - dst->u[0].f = src3->value.u[0].f; - for (k = 0; k < src1->node.data_type->dimx; ++k) -@@ -774,8 +774,8 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -841,8 +841,8 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co - { - unsigned int k; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -877,8 +877,8 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -916,8 +916,8 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con - { - unsigned int k; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -955,8 +955,8 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); -- assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); -+ VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -986,8 +986,8 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1024,8 +1024,8 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1063,8 +1063,8 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1105,8 +1105,8 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons - enum hlsl_base_type type = dst_type->e.numeric.type; - unsigned int k; - -- assert(type == src1->node.data_type->e.numeric.type); -- assert(type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1139,8 +1139,8 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -- assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1175,9 +1175,9 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, - { - unsigned int k; - -- assert(dst_type->e.numeric.type == src2->node.data_type->e.numeric.type); -- assert(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); -- assert(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == src2->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); - - for (k = 0; k < dst_type->dimx; ++k) - dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; -@@ -1190,8 +1190,8 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c - { - unsigned int k; - -- assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); -- assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); -+ VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); -+ VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); - - for (k = 0; k < dst_type->dimx; ++k) - { -@@ -1239,7 +1239,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, - { - if (expr->operands[i].node->type != HLSL_IR_CONSTANT) - return false; -- assert(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); - } - } - arg1 = hlsl_ir_constant(expr->operands[0].node); -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index b3b745fc1b2..6dbe30b1553 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -19,9 +19,73 @@ - #include "vkd3d_shader_private.h" - #include "vkd3d_types.h" - --bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) -+static int convert_parameter_info(const struct vkd3d_shader_compile_info *compile_info, -+ unsigned int *ret_count, const struct vkd3d_shader_parameter1 **ret_parameters) -+{ -+ const struct vkd3d_shader_spirv_target_info *spirv_info; -+ struct vkd3d_shader_parameter1 *parameters; -+ -+ *ret_count = 0; -+ *ret_parameters = NULL; -+ -+ if (!(spirv_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO)) || !spirv_info->parameter_count) -+ return VKD3D_OK; -+ -+ if (!(parameters = vkd3d_calloc(spirv_info->parameter_count, sizeof(*parameters)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ for (unsigned int i = 0; i < spirv_info->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter *src = &spirv_info->parameters[i]; -+ struct vkd3d_shader_parameter1 *dst = ¶meters[i]; -+ -+ dst->name = src->name; -+ dst->type = src->type; -+ dst->data_type = src->data_type; -+ -+ if (src->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -+ { -+ dst->u.immediate_constant = src->u.immediate_constant; -+ } -+ else if (src->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) -+ { -+ dst->u.specialization_constant = src->u.specialization_constant; -+ } -+ else -+ { -+ ERR("Invalid parameter type %#x.\n", src->type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ } -+ -+ *ret_count = spirv_info->parameter_count; -+ *ret_parameters = parameters; -+ -+ return VKD3D_OK; -+} -+ -+bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, -+ const struct vkd3d_shader_version *version, unsigned int reserve) - { - memset(program, 0, sizeof(*program)); -+ -+ if (compile_info) -+ { -+ const struct vkd3d_shader_parameter_info *parameter_info; -+ -+ if ((parameter_info = vkd3d_find_struct(compile_info->next, PARAMETER_INFO))) -+ { -+ program->parameter_count = parameter_info->parameter_count; -+ program->parameters = parameter_info->parameters; -+ } -+ else -+ { -+ if (convert_parameter_info(compile_info, &program->parameter_count, &program->parameters) < 0) -+ return false; -+ program->free_parameters = true; -+ } -+ } -+ - program->shader_version = *version; - return shader_instruction_array_init(&program->instructions, reserve); - } -@@ -30,6 +94,8 @@ void vsir_program_cleanup(struct vsir_program *program) - { - size_t i; - -+ if (program->free_parameters) -+ vkd3d_free((void *)program->parameters); - for (i = 0; i < program->block_name_count; ++i) - vkd3d_free((void *)program->block_names[i]); - vkd3d_free(program->block_names); -@@ -39,6 +105,18 @@ void vsir_program_cleanup(struct vsir_program *program) - shader_signature_cleanup(&program->patch_constant_signature); - } - -+const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( -+ const struct vsir_program *program, enum vkd3d_shader_parameter_name name) -+{ -+ for (unsigned int i = 0; i < program->parameter_count; ++i) -+ { -+ if (program->parameters[i].name == name) -+ return &program->parameters[i]; -+ } -+ -+ return NULL; -+} -+ - static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) - { - return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; -@@ -46,9 +124,9 @@ static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shade - - static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) - { -- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; -- return (VKD3DSIH_DCL <= handler_idx && handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) -- || handler_idx == VKD3DSIH_HS_DECLS; -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) -+ || opcode == VKD3DSIH_HS_DECLS; - } - - static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) -@@ -60,9 +138,9 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i - - static bool vsir_instruction_init_with_params(struct vsir_program *program, - struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -- enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) -+ enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count) - { -- vsir_instruction_init(ins, location, handler_idx); -+ vsir_instruction_init(ins, location, opcode); - ins->dst_count = dst_count; - ins->src_count = src_count; - -@@ -287,7 +365,7 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro - mul_ins = &instructions->elements[pos]; - add_ins = &instructions->elements[pos + 1]; - -- mul_ins->handler_idx = VKD3DSIH_MUL; -+ mul_ins->opcode = VKD3DSIH_MUL; - mul_ins->src_count = 2; - - if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) -@@ -311,6 +389,58 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro - return VKD3D_OK; - } - -+static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *program, -+ struct vkd3d_shader_instruction *sincos) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ size_t pos = sincos - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ unsigned int s; -+ -+ if (sincos->dst_count != 1) -+ return VKD3D_OK; -+ -+ if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ ins = &instructions->elements[pos + 1]; -+ -+ if (!(vsir_instruction_init_with_params(program, ins, &sincos->location, VKD3DSIH_SINCOS, 2, 1))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ ins->flags = sincos->flags; -+ -+ *ins->src = *sincos->src; -+ /* Set the source swizzle to replicate the first component. */ -+ s = vsir_swizzle_get_component(sincos->src->swizzle, 0); -+ ins->src->swizzle = vkd3d_shader_create_swizzle(s, s, s, s); -+ -+ if (sincos->dst->write_mask & VKD3DSP_WRITEMASK_1) -+ { -+ ins->dst[0] = *sincos->dst; -+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_1; -+ } -+ else -+ { -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); -+ } -+ -+ if (sincos->dst->write_mask & VKD3DSP_WRITEMASK_0) -+ { -+ ins->dst[1] = *sincos->dst; -+ ins->dst[1].write_mask = VKD3DSP_WRITEMASK_0; -+ } -+ else -+ { -+ vsir_dst_param_init(&ins->dst[1], VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); -+ } -+ -+ /* Make the original instruction no-op */ -+ vkd3d_shader_instruction_make_nop(sincos); -+ -+ return VKD3D_OK; -+} -+ - static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, - struct vkd3d_shader_message_context *message_context) - { -@@ -322,7 +452,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - { - struct vkd3d_shader_instruction *ins = &instructions->elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_IFC: - if ((ret = vsir_program_lower_ifc(program, ins, &tmp_idx, message_context)) < 0) -@@ -339,11 +469,18 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr - return ret; - break; - -+ case VKD3DSIH_DCL: - case VKD3DSIH_DCL_CONSTANT_BUFFER: -+ case VKD3DSIH_DCL_SAMPLER: - case VKD3DSIH_DCL_TEMPS: - vkd3d_shader_instruction_make_nop(ins); - break; - -+ case VKD3DSIH_SINCOS: -+ if ((ret = vsir_program_lower_sm1_sincos(program, ins)) < 0) -+ return ret; -+ break; -+ - default: - break; - } -@@ -492,26 +629,26 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal - struct shader_phase_location *loc; - bool b; - -- if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) -+ if (ins->opcode == VKD3DSIH_HS_FORK_PHASE || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) - { - b = flattener_is_in_fork_or_join_phase(normaliser); - /* Reset the phase info. */ - normaliser->phase_body_idx = ~0u; -- normaliser->phase = ins->handler_idx; -+ normaliser->phase = ins->opcode; - normaliser->instance_count = 1; - /* Leave the first occurrence and delete the rest. */ - if (b) - vkd3d_shader_instruction_make_nop(ins); - return; - } -- else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT -- || ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) -+ else if (ins->opcode == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT -+ || ins->opcode == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) - { - normaliser->instance_count = ins->declaration.count + !ins->declaration.count; - vkd3d_shader_instruction_make_nop(ins); - return; - } -- else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( -+ else if (ins->opcode == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( - &ins->declaration.dst.reg)) - { - vkd3d_shader_instruction_make_nop(ins); -@@ -524,7 +661,7 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal - if (normaliser->phase_body_idx == ~0u) - normaliser->phase_body_idx = index; - -- if (ins->handler_idx == VKD3DSIH_RET) -+ if (ins->opcode == VKD3DSIH_RET) - { - normaliser->last_ret_location = ins->location; - vkd3d_shader_instruction_make_nop(ins); -@@ -666,6 +803,12 @@ static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigne - dst->write_mask = VKD3DSP_WRITEMASK_0; - } - -+static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx) -+{ -+ vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); -+ src->reg.idx[0].offset = idx; -+} -+ - static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx) - { - vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); -@@ -678,12 +821,18 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 - src->reg.u.immconst_u32[0] = value; - } - -+static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) -+{ -+ vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); -+ src->reg.idx[0].offset = idx; -+} -+ - void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -- enum vkd3d_shader_opcode handler_idx) -+ enum vkd3d_shader_opcode opcode) - { - memset(ins, 0, sizeof(*ins)); - ins->location = *location; -- ins->handler_idx = handler_idx; -+ ins->opcode = opcode; - } - - static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, -@@ -770,7 +919,7 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param - if (control_point_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) - { - /* The TPF reader validates idx_count. */ -- assert(reg->idx_count == 1); -+ VKD3D_ASSERT(reg->idx_count == 1); - reg->idx[1] = reg->idx[0]; - /* The control point id param is implicit here. Avoid later complications by inserting it. */ - reg->idx[0].offset = 0; -@@ -865,12 +1014,12 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - { - ins = &instructions->elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- normaliser.phase = ins->handler_idx; -+ normaliser.phase = ins->opcode; - break; - default: - if (vsir_instruction_is_dcl(ins)) -@@ -888,7 +1037,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i - { - ins = &instructions->elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: - input_control_point_count = ins->declaration.count; -@@ -992,16 +1141,16 @@ static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], u - { - unsigned int i, j, r, c, component_idx, component_count; - -- assert(write_mask <= VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(write_mask <= VKD3DSP_WRITEMASK_ALL); - component_idx = vsir_write_mask_get_component_idx(write_mask); - component_count = vsir_write_mask_component_count(write_mask); - -- assert(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); -+ VKD3D_ASSERT(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); - - if (range_map[register_idx][component_idx] > register_count && is_dcl_indexrange) - { - /* Validated in the TPF reader. */ -- assert(range_map[register_idx][component_idx] != UINT8_MAX); -+ VKD3D_ASSERT(range_map[register_idx][component_idx] != UINT8_MAX); - return; - } - if (range_map[register_idx][component_idx] == register_count) -@@ -1021,7 +1170,7 @@ static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], u - /* A synthetic patch constant range which overlaps an existing range can start upstream of it - * for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap. - * The latter is validated in the TPF reader. */ -- assert(!range_map[r][c] || !is_dcl_indexrange); -+ VKD3D_ASSERT(!range_map[r][c] || !is_dcl_indexrange); - range_map[r][c] = UINT8_MAX; - } - } -@@ -1224,7 +1373,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map - - TRACE("Merging %s, reg %u, mask %#x, sysval %#x with %s, mask %#x, sysval %#x.\n", e->semantic_name, - e->register_index, e->mask, e->sysval_semantic, f->semantic_name, f->mask, f->sysval_semantic); -- assert(!(e->mask & f->mask)); -+ VKD3D_ASSERT(!(e->mask & f->mask)); - - e->mask |= f->mask; - e->used_mask |= f->used_mask; -@@ -1258,7 +1407,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map - continue; - - register_count = range_map_get_register_count(range_map, e->register_index, e->mask); -- assert(register_count != UINT8_MAX); -+ VKD3D_ASSERT(register_count != UINT8_MAX); - register_count += !register_count; - - if (register_count > 1) -@@ -1281,7 +1430,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map - static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg, - unsigned int id_idx, unsigned int register_index) - { -- assert(id_idx < ARRAY_SIZE(reg->idx) - 1); -+ VKD3D_ASSERT(id_idx < ARRAY_SIZE(reg->idx) - 1); - - /* For a relative-addressed register index, move the id up a slot to separate it from the address, - * because rel_addr can be replaced with a constant offset in some cases. */ -@@ -1388,7 +1537,7 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - if (is_io_dcl) - { - /* Validated in the TPF reader. */ -- assert(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); -+ VKD3D_ASSERT(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); - - if (dcl_params[element_idx]) - { -@@ -1413,7 +1562,7 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par - else - { - /* The control point id param. */ -- assert(reg->idx[0].rel_addr); -+ VKD3D_ASSERT(reg->idx[0].rel_addr); - } - id_idx = 1; - } -@@ -1526,7 +1675,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - struct vkd3d_shader_register *reg; - unsigned int i; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL_INPUT: - if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) -@@ -1560,7 +1709,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- normaliser->phase = ins->handler_idx; -+ normaliser->phase = ins->opcode; - memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params)); - memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params)); - memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params)); -@@ -1576,7 +1725,33 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi - } - } - --static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program) -+static bool use_flat_interpolation(const struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ static const struct vkd3d_shader_location no_loc; -+ const struct vkd3d_shader_parameter1 *parameter; -+ -+ if (!(parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION))) -+ return false; -+ -+ if (parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Unsupported flat interpolation parameter type %#x.\n", parameter->type); -+ return false; -+ } -+ if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid flat interpolation parameter data type %#x.\n", parameter->data_type); -+ return false; -+ } -+ -+ return parameter->u.immediate_constant.u.u32; -+} -+ -+static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) - { - struct io_normaliser normaliser = {program->instructions}; - struct vkd3d_shader_instruction *ins; -@@ -1594,7 +1769,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - { - ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: - normaliser.output_control_point_count = ins->declaration.count; -@@ -1608,7 +1783,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - /* fall through */ - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- normaliser.phase = ins->handler_idx; -+ normaliser.phase = ins->opcode; - break; - default: - break; -@@ -1626,7 +1801,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j]; - else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j]) - normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j]; -- else assert(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); -+ else VKD3D_ASSERT(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); - } - } - } -@@ -1639,6 +1814,18 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program - return VKD3D_ERROR_OUT_OF_MEMORY; - } - -+ if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL -+ && program->shader_version.major < 4 && use_flat_interpolation(program, message_context)) -+ { -+ for (i = 0; i < program->input_signature.element_count; ++i) -+ { -+ struct signature_element *element = &program->input_signature.elements[i]; -+ -+ if (!ascii_strcasecmp(element->semantic_name, "COLOR")) -+ element->interpolation_mode = VKD3DSIM_CONSTANT; -+ } -+ } -+ - normaliser.phase = VKD3DSIH_INVALID; - for (i = 0; i < normaliser.instructions.count; ++i) - shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser); -@@ -1740,7 +1927,7 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) -+ if (ins->opcode == VKD3DSIH_DEF || ins->opcode == VKD3DSIH_DEFI || ins->opcode == VKD3DSIH_DEFB) - { - struct flat_constant_def *def; - -@@ -1779,7 +1966,7 @@ static void remove_dead_code(struct vsir_program *program) - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_IF: - case VKD3DSIH_LOOP: -@@ -1799,7 +1986,7 @@ static void remove_dead_code(struct vsir_program *program) - { - if (depth > 0) - { -- if (ins->handler_idx != VKD3DSIH_ELSE) -+ if (ins->opcode != VKD3DSIH_ELSE) - --depth; - vkd3d_shader_instruction_make_nop(ins); - } -@@ -1870,14 +2057,14 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - struct vkd3d_shader_src_param *srcs; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_TEX: - if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) - return VKD3D_ERROR_OUT_OF_MEMORY; - memset(srcs, 0, sizeof(*srcs) * 3); - -- ins->handler_idx = VKD3DSIH_SAMPLE; -+ ins->opcode = VKD3DSIH_SAMPLE; - - srcs[0] = ins->src[0]; - -@@ -1899,13 +2086,42 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr - ins->src_count = 3; - break; - -+ case VKD3DSIH_TEXLDD: -+ if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ memset(srcs, 0, sizeof(*srcs) * 5); -+ -+ ins->opcode = VKD3DSIH_SAMPLE_GRAD; -+ -+ srcs[0] = ins->src[0]; -+ -+ srcs[1].reg.type = VKD3DSPR_RESOURCE; -+ srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; -+ srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; -+ srcs[1].reg.idx_count = 2; -+ srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; -+ srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; -+ srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ -+ srcs[2].reg.type = VKD3DSPR_SAMPLER; -+ srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; -+ srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; -+ srcs[2].reg.idx_count = 2; -+ srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; -+ -+ srcs[3] = ins->src[2]; -+ srcs[4] = ins->src[3]; -+ -+ ins->src = srcs; -+ ins->src_count = 5; -+ break; -+ - case VKD3DSIH_TEXBEM: - case VKD3DSIH_TEXBEML: - case VKD3DSIH_TEXCOORD: - case VKD3DSIH_TEXDEPTH: - case VKD3DSIH_TEXDP3: - case VKD3DSIH_TEXDP3TEX: -- case VKD3DSIH_TEXLDD: - case VKD3DSIH_TEXLDL: - case VKD3DSIH_TEXM3x2PAD: - case VKD3DSIH_TEXM3x2TEX: -@@ -1919,7 +2135,7 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr - case VKD3DSIH_TEXREG2RGB: - vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, - "Aborting due to not yet implemented feature: " -- "Combined sampler instruction %#x.", ins->handler_idx); -+ "Combined sampler instruction %#x.", ins->opcode); - return VKD3D_ERROR_NOT_IMPLEMENTED; - - default: -@@ -2030,7 +2246,7 @@ static bool cf_flattener_copy_instruction(struct cf_flattener *flattener, - { - struct vkd3d_shader_instruction *dst_ins; - -- if (instruction->handler_idx == VKD3DSIH_NOP) -+ if (instruction->opcode == VKD3DSIH_NOP) - return true; - - if (!(dst_ins = cf_flattener_require_space(flattener, 1))) -@@ -2245,9 +2461,9 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - * phase instruction, and in all other shader types begins with the first label instruction. - * Declaring an indexable temp with function scope is not considered a declaration, - * because it needs to live inside a function. */ -- if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) -+ if (!after_declarations_section && instruction->opcode != VKD3DSIH_NOP) - { -- bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP -+ bool is_function_indexable = instruction->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP - && instruction->declaration.indexable_temp.has_function_scope; - - if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) -@@ -2260,14 +2476,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte - cf_info = flattener->control_flow_depth - ? &flattener->control_flow_info[flattener->control_flow_depth - 1] : NULL; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: - if (!cf_flattener_copy_instruction(flattener, instruction)) - return VKD3D_ERROR_OUT_OF_MEMORY; -- if (instruction->handler_idx != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) -+ if (instruction->opcode != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) - after_declarations_section = false; - break; - -@@ -2601,7 +2817,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi - - static unsigned int label_from_src_param(const struct vkd3d_shader_src_param *param) - { -- assert(param->reg.type == VKD3DSPR_LABEL); -+ VKD3D_ASSERT(param->reg.type == VKD3DSPR_LABEL); - return param->reg.idx[0].offset; - } - -@@ -2662,7 +2878,7 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - unsigned int case_count, j, default_label; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); -@@ -2858,7 +3074,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ - - /* Only phi src/dst SSA values need be converted here. Structurisation may - * introduce new cases of undominated SSA use, which will be handled later. */ -- if (ins->handler_idx != VKD3DSIH_PHI) -+ if (ins->opcode != VKD3DSIH_PHI) - continue; - ++phi_count; - -@@ -2870,7 +3086,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ - unsigned int label; - - label = label_from_src_param(&ins->src[j + 1]); -- assert(label); -+ VKD3D_ASSERT(label); - - info = &block_info[label - 1]; - -@@ -2907,7 +3123,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ - for (j = 0; j < ins->src_count; ++j) - materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_LABEL: - current_label = label_from_src_param(&ins->src[0]); -@@ -3027,7 +3243,7 @@ static enum vkd3d_result vsir_block_init(struct vsir_block *block, unsigned int - - byte_count = VKD3D_BITMAP_SIZE(block_count) * sizeof(*block->dominates); - -- assert(label); -+ VKD3D_ASSERT(label); - memset(block, 0, sizeof(*block)); - block->label = label; - vsir_block_list_init(&block->predecessors); -@@ -3311,7 +3527,7 @@ static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_blo - struct vsir_block *successor = &cfg->blocks[target - 1]; - enum vkd3d_result ret; - -- assert(successor->label != 0); -+ VKD3D_ASSERT(successor->label != 0); - - if ((ret = vsir_block_list_add(&block->successors, successor)) < 0) - return ret; -@@ -3336,7 +3552,7 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) - if (block->label == 0) - continue; - -- switch (block->end->handler_idx) -+ switch (block->end->opcode) - { - case VKD3DSIH_RET: - shape = "trapezium"; -@@ -3478,7 +3694,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; - bool finish = false; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_PHI: - case VKD3DSIH_SWITCH_MONOLITHIC: -@@ -3488,11 +3704,11 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - { - unsigned int label = label_from_src_param(&instruction->src[0]); - -- assert(!current_block); -- assert(label > 0); -- assert(label <= cfg->block_count); -+ VKD3D_ASSERT(!current_block); -+ VKD3D_ASSERT(label > 0); -+ VKD3D_ASSERT(label <= cfg->block_count); - current_block = &cfg->blocks[label - 1]; -- assert(current_block->label == 0); -+ VKD3D_ASSERT(current_block->label == 0); - if ((ret = vsir_block_init(current_block, label, program->block_count)) < 0) - goto fail; - current_block->begin = &program->instructions.elements[i + 1]; -@@ -3503,7 +3719,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - - case VKD3DSIH_BRANCH: - case VKD3DSIH_RET: -- assert(current_block); -+ VKD3D_ASSERT(current_block); - current_block->end = instruction; - current_block = NULL; - break; -@@ -3511,7 +3727,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- assert(!current_block); -+ VKD3D_ASSERT(!current_block); - finish = true; - break; - -@@ -3533,7 +3749,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program - if (block->label == 0) - continue; - -- switch (block->end->handler_idx) -+ switch (block->end->opcode) - { - case VKD3DSIH_RET: - break; -@@ -3581,7 +3797,7 @@ static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, stru - { - size_t i; - -- assert(current->label != 0); -+ VKD3D_ASSERT(current->label != 0); - - if (current == reference) - return; -@@ -3615,11 +3831,16 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) - { - struct vsir_block *block2 = &cfg->blocks[j]; - -- if (block2->label == 0) -+ if (block2->label == 0 || !vsir_block_dominates(block, block2)) - continue; - -- if (vsir_block_dominates(block, block2)) -- vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); -+ if (cfg->debug_buffer.content_size > 512) -+ { -+ TRACE("%s...\n", cfg->debug_buffer.buffer); -+ vkd3d_string_buffer_clear(&cfg->debug_buffer); -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block %u dominates: ...", block->label); -+ } -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); - } - TRACE("%s\n", cfg->debug_buffer.buffer); - vkd3d_string_buffer_clear(&cfg->debug_buffer); -@@ -3711,7 +3932,16 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) - vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label); - - for (k = 0; k < loop->count; ++k) -+ { -+ if (cfg->debug_buffer.content_size > 512) -+ { -+ TRACE("%s...\n", cfg->debug_buffer.buffer); -+ vkd3d_string_buffer_clear(&cfg->debug_buffer); -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop: ...", -+ block->label, header->label); -+ } - vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", loop->blocks[k]->label); -+ } - - TRACE("%s\n", cfg->debug_buffer.buffer); - vkd3d_string_buffer_clear(&cfg->debug_buffer); -@@ -3796,7 +4026,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) - /* Do not count back edges. */ - if (cfg->loops_by_header[i] != SIZE_MAX) - { -- assert(in_degrees[i] > 0); -+ VKD3D_ASSERT(in_degrees[i] > 0); - in_degrees[i] -= 1; - } - -@@ -3882,7 +4112,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) - - inner_stack_item->seen_count += new_seen_count; - -- assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); -+ VKD3D_ASSERT(inner_stack_item->seen_count <= inner_stack_item->loop->count); - if (inner_stack_item->seen_count != inner_stack_item->loop->count) - break; - -@@ -3902,7 +4132,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) - if (vsir_block_dominates(successor, block)) - continue; - -- assert(in_degrees[successor->label - 1] > 0); -+ VKD3D_ASSERT(in_degrees[successor->label - 1] > 0); - --in_degrees[successor->label - 1]; - - if (in_degrees[successor->label - 1] == 0) -@@ -3923,7 +4153,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) - goto fail; - } - -- assert(sorter.stack_count == 0); -+ VKD3D_ASSERT(sorter.stack_count == 0); - - vkd3d_free(in_degrees); - vkd3d_free(sorter.stack); -@@ -3934,7 +4164,15 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) - vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:"); - - for (i = 0; i < cfg->order.count; ++i) -+ { -+ if (cfg->debug_buffer.content_size > 512) -+ { -+ TRACE("%s...\n", cfg->debug_buffer.buffer); -+ vkd3d_string_buffer_clear(&cfg->debug_buffer); -+ vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order: ..."); -+ } - vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label); -+ } - - TRACE("%s\n", cfg->debug_buffer.buffer); - vkd3d_string_buffer_clear(&cfg->debug_buffer); -@@ -3988,12 +4226,12 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ - ACTION_EXTEND, - } action = ACTION_CREATE_NEW; - -- /* We've already contructed loop intervals for the back -+ /* We've already constructed loop intervals for the back - * edges, there's nothing more to do. */ - if (vsir_block_dominates(successor, block)) - continue; - -- assert(block->order_pos < successor->order_pos); -+ VKD3D_ASSERT(block->order_pos < successor->order_pos); - - /* Jumping from a block to the following one is always - * possible, so nothing to do. */ -@@ -4066,7 +4304,7 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ - { - if (interval->synthetic) - interval->begin = min(begin, interval->begin); -- assert(begin >= interval->begin); -+ VKD3D_ASSERT(begin >= interval->begin); - } - } - -@@ -4119,7 +4357,7 @@ static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block - break; - } - -- assert(action->target != UINT_MAX); -+ VKD3D_ASSERT(action->target != UINT_MAX); - action->jump_type = JUMP_CONTINUE; - } - else -@@ -4141,7 +4379,7 @@ static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block - - if (action->target == UINT_MAX) - { -- assert(successor->order_pos == block->order_pos + 1); -+ VKD3D_ASSERT(successor->order_pos == block->order_pos + 1); - action->jump_type = JUMP_NONE; - } - else -@@ -4168,7 +4406,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - struct vsir_block *block = cfg->order.blocks[i]; - struct vsir_cfg_structure *structure; - -- assert(stack_depth > 0); -+ VKD3D_ASSERT(stack_depth > 0); - - /* Open loop intervals. */ - while (open_interval_idx < cfg->loop_interval_count) -@@ -4192,7 +4430,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - structure->u.block = block; - - /* Generate between zero and two jump instructions. */ -- switch (block->end->handler_idx) -+ switch (block->end->opcode) - { - case VKD3DSIH_BRANCH: - { -@@ -4227,7 +4465,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - * selection ladders. */ - if (action_true.successor == action_false.successor) - { -- assert(action_true.jump_type == action_false.jump_type); -+ VKD3D_ASSERT(action_true.jump_type == action_false.jump_type); - } - else - { -@@ -4243,10 +4481,10 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - struct vsir_cfg_structure_list *inner_loop_frame = stack[stack_depth - 2]; - struct vsir_cfg_structure *inner_loop = &inner_loop_frame->structures[inner_loop_frame->count - 1]; - -- assert(inner_loop->type == STRUCTURE_TYPE_LOOP); -+ VKD3D_ASSERT(inner_loop->type == STRUCTURE_TYPE_LOOP); - - /* Otherwise, if one of the branches is -- * continueing the inner loop we're inside, -+ * continue-ing the inner loop we're inside, - * make sure it's the false branch (because it - * will be optimized out later). */ - if (action_true.jump_type == JUMP_CONTINUE && action_true.target == inner_loop->u.loop.idx) -@@ -4260,7 +4498,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - action_false = tmp; - } - -- assert(action_true.jump_type != JUMP_NONE); -+ VKD3D_ASSERT(action_true.jump_type != JUMP_NONE); - - if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) - goto fail; -@@ -4300,8 +4538,8 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) - } - } - -- assert(stack_depth == 0); -- assert(open_interval_idx == cfg->loop_interval_count); -+ VKD3D_ASSERT(stack_depth == 0); -+ VKD3D_ASSERT(open_interval_idx == cfg->loop_interval_count); - - if (TRACE_ON()) - vsir_cfg_dump_structured_program(cfg); -@@ -4325,7 +4563,7 @@ static void vsir_cfg_remove_trailing_continue(struct vsir_cfg *cfg, - && !last->u.jump.condition && last->u.jump.target == target) - { - --list->count; -- assert(cfg->loop_intervals[target].target_count > 0); -+ VKD3D_ASSERT(cfg->loop_intervals[target].target_count > 0); - --cfg->loop_intervals[target].target_count; - } - } -@@ -4366,7 +4604,7 @@ static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg - size_t pos = list->count - 1; - - selection = &list->structures[pos]; -- assert(selection->type == STRUCTURE_TYPE_SELECTION); -+ VKD3D_ASSERT(selection->type == STRUCTURE_TYPE_SELECTION); - - if_break = vsir_cfg_get_trailing_break(&selection->u.selection.if_body); - else_break = vsir_cfg_get_trailing_break(&selection->u.selection.else_body); -@@ -4387,19 +4625,19 @@ static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg - /* Pointer `selection' could have been invalidated by the append - * operation. */ - selection = &list->structures[pos]; -- assert(selection->type == STRUCTURE_TYPE_SELECTION); -+ VKD3D_ASSERT(selection->type == STRUCTURE_TYPE_SELECTION); - - if (if_target == max_target) - { - --selection->u.selection.if_body.count; -- assert(cfg->loop_intervals[if_target].target_count > 0); -+ VKD3D_ASSERT(cfg->loop_intervals[if_target].target_count > 0); - --cfg->loop_intervals[if_target].target_count; - } - - if (else_target == max_target) - { - --selection->u.selection.else_body.count; -- assert(cfg->loop_intervals[else_target].target_count > 0); -+ VKD3D_ASSERT(cfg->loop_intervals[else_target].target_count > 0); - --cfg->loop_intervals[else_target].target_count; - } - -@@ -4507,7 +4745,7 @@ static enum vkd3d_result vsir_cfg_append_loop(struct vsir_cfg *cfg, - } - - target = trailing_break->u.jump.target; -- assert(cfg->loop_intervals[target].target_count > 0); -+ VKD3D_ASSERT(cfg->loop_intervals[target].target_count > 0); - - /* If the loop is not targeted by any jump, we can remove it. The - * trailing `break' then targets another loop, so we have to keep -@@ -4674,7 +4912,7 @@ static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_stru - break; - for (l = loop; l && l->u.loop.idx != structure->u.jump.target; l = l->u.loop.outer_loop) - { -- assert(l->type == STRUCTURE_TYPE_LOOP); -+ VKD3D_ASSERT(l->type == STRUCTURE_TYPE_LOOP); - l->u.loop.needs_trampoline = true; - } - break; -@@ -4714,7 +4952,7 @@ static void vsir_cfg_mark_launchers(struct vsir_cfg *cfg, struct vsir_cfg_struct - case STRUCTURE_TYPE_JUMP: - if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) - break; -- assert(loop && loop->type == STRUCTURE_TYPE_LOOP); -+ VKD3D_ASSERT(loop && loop->type == STRUCTURE_TYPE_LOOP); - if (loop->u.loop.needs_trampoline) - structure->u.jump.needs_launcher = true; - break; -@@ -4888,14 +5126,14 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, - struct vsir_cfg_emit_target *target = cfg->target; - const struct vkd3d_shader_location no_loc = {0}; - /* Encode the jump target as the loop index plus a bit to remember whether -- * we're breaking or continueing. */ -+ * we're breaking or continue-ing. */ - unsigned int jump_target = jump->target << 1; - enum vkd3d_shader_opcode opcode; - - switch (jump->type) - { - case JUMP_CONTINUE: -- /* If we're continueing the loop we're directly inside, then we can emit a -+ /* If we're continue-ing the loop we're directly inside, then we can emit a - * `continue'. Otherwise we first have to break all the loops between here - * and the loop to continue, recording our intention to continue - * in the lowest bit of jump_target. */ -@@ -4912,7 +5150,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, - break; - - case JUMP_RET: -- assert(!jump->condition); -+ VKD3D_ASSERT(!jump->condition); - opcode = VKD3DSIH_RET; - break; - -@@ -5049,22 +5287,22 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_LABEL: -- assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); -+ VKD3D_ASSERT(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); - TRACE("Structurizing a non-hull shader.\n"); - if ((ret = vsir_program_structurize_function(program, message_context, - &target, &i)) < 0) - goto fail; -- assert(i == program->instructions.count); -+ VKD3D_ASSERT(i == program->instructions.count); - break; - - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -- TRACE("Structurizing phase %u of a hull shader.\n", ins->handler_idx); -+ VKD3D_ASSERT(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -+ TRACE("Structurizing phase %u of a hull shader.\n", ins->opcode); - target.instructions[target.ins_count++] = *ins; - ++i; - if ((ret = vsir_program_structurize_function(program, message_context, -@@ -5222,22 +5460,22 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru - { - struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - -- switch (ins->handler_idx) -+ switch (ins->opcode) - { - case VKD3DSIH_LABEL: -- assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); -+ VKD3D_ASSERT(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); - TRACE("Materializing undominated SSAs in a non-hull shader.\n"); - if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( - program, message_context, &i)) < 0) - return ret; -- assert(i == program->instructions.count); -+ VKD3D_ASSERT(i == program->instructions.count); - break; - - case VKD3DSIH_HS_CONTROL_POINT_PHASE: - case VKD3DSIH_HS_FORK_PHASE: - case VKD3DSIH_HS_JOIN_PHASE: -- assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -- TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->handler_idx); -+ VKD3D_ASSERT(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); -+ TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->opcode); - ++i; - if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( - program, message_context, &i)) < 0) -@@ -5253,6 +5491,192 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru - return VKD3D_OK; - } - -+static bool find_colour_signature_idx(const struct shader_signature *signature, uint32_t *index) -+{ -+ for (unsigned int i = 0; i < signature->element_count; ++i) -+ { -+ if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_TARGET -+ && !signature->elements[i].register_index) -+ { -+ *index = i; -+ return true; -+ } -+ } -+ -+ return false; -+} -+ -+static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program, -+ const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func, -+ const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, uint32_t colour_temp, size_t *ret_pos) -+{ -+ struct vkd3d_shader_instruction_array *instructions = &program->instructions; -+ size_t pos = ret - instructions->elements; -+ struct vkd3d_shader_instruction *ins; -+ -+ static const struct -+ { -+ enum vkd3d_shader_opcode float_opcode; -+ enum vkd3d_shader_opcode uint_opcode; -+ bool swap; -+ } -+ opcodes[] = -+ { -+ [VKD3D_SHADER_COMPARISON_FUNC_EQUAL] = {VKD3DSIH_EQO, VKD3DSIH_IEQ}, -+ [VKD3D_SHADER_COMPARISON_FUNC_NOT_EQUAL] = {VKD3DSIH_NEO, VKD3DSIH_INE}, -+ [VKD3D_SHADER_COMPARISON_FUNC_GREATER_EQUAL] = {VKD3DSIH_GEO, VKD3DSIH_UGE}, -+ [VKD3D_SHADER_COMPARISON_FUNC_LESS] = {VKD3DSIH_LTO, VKD3DSIH_ULT}, -+ [VKD3D_SHADER_COMPARISON_FUNC_LESS_EQUAL] = {VKD3DSIH_GEO, VKD3DSIH_UGE, true}, -+ [VKD3D_SHADER_COMPARISON_FUNC_GREATER] = {VKD3DSIH_LTO, VKD3DSIH_ULT, true}, -+ }; -+ -+ if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_NEVER) -+ { -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ ins = &program->instructions.elements[pos]; -+ -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); -+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; -+ src_param_init_const_uint(&ins->src[0], 0); -+ -+ *ret_pos = pos + 1; -+ return VKD3D_OK; -+ } -+ -+ if (!shader_instruction_array_insert_at(&program->instructions, pos, 3)) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ -+ ins = &program->instructions.elements[pos]; -+ -+ switch (ref->data_type) -+ { -+ case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32: -+ vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].float_opcode, 1, 2); -+ src_param_init_temp_float(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); -+ src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], -+ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_FLOAT); -+ break; -+ -+ case VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32: -+ vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].uint_opcode, 1, 2); -+ src_param_init_temp_uint(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); -+ src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], -+ VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); -+ break; -+ -+ default: -+ FIXME("Unhandled parameter data type %#x.\n", ref->data_type); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ -+ dst_param_init_ssa_bool(&ins->dst[0], program->ssa_count); -+ ins->src[opcodes[compare_func].swap ? 1 : 0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[opcodes[compare_func].swap ? 1 : 0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); -+ -+ ++ins; -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); -+ ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; -+ src_param_init_ssa_bool(&ins->src[0], program->ssa_count); -+ -+ ++program->ssa_count; -+ -+ ++ins; -+ vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); -+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); -+ ins->dst[0].reg.idx[0].offset = colour_signature_idx; -+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->dst[0].write_mask = program->output_signature.elements[colour_signature_idx].mask; -+ src_param_init_temp_float(&ins->src[0], colour_temp); -+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; -+ -+ *ret_pos = pos + 3; -+ return VKD3D_OK; -+} -+ -+static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *program, -+ struct vkd3d_shader_message_context *message_context) -+{ -+ const struct vkd3d_shader_parameter1 *func = NULL, *ref = NULL; -+ static const struct vkd3d_shader_location no_loc; -+ enum vkd3d_shader_comparison_func compare_func; -+ uint32_t colour_signature_idx, colour_temp; -+ struct vkd3d_shader_instruction *ins; -+ size_t new_pos; -+ int ret; -+ -+ if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) -+ return VKD3D_OK; -+ -+ if (!find_colour_signature_idx(&program->output_signature, &colour_signature_idx) -+ || !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3)) -+ return VKD3D_OK; -+ -+ if (!(func = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC)) -+ || !(ref = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF))) -+ return VKD3D_OK; -+ -+ if (func->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, -+ "Unsupported alpha test function parameter type %#x.\n", func->type); -+ return VKD3D_ERROR_NOT_IMPLEMENTED; -+ } -+ if (func->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) -+ { -+ vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, -+ "Invalid alpha test function parameter data type %#x.\n", func->data_type); -+ return VKD3D_ERROR_INVALID_ARGUMENT; -+ } -+ compare_func = func->u.immediate_constant.u.u32; -+ -+ if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_ALWAYS) -+ return VKD3D_OK; -+ -+ /* We're going to be reading from the output, so we need to go -+ * through the whole shader and convert it to a temp. */ -+ -+ if (compare_func != VKD3D_SHADER_COMPARISON_FUNC_NEVER) -+ colour_temp = program->temp_count++; -+ -+ for (size_t i = 0; i < program->instructions.count; ++i) -+ { -+ ins = &program->instructions.elements[i]; -+ -+ if (vsir_instruction_is_dcl(ins)) -+ continue; -+ -+ if (ins->opcode == VKD3DSIH_RET) -+ { -+ if ((ret = insert_alpha_test_before_ret(program, ins, compare_func, -+ ref, colour_signature_idx, colour_temp, &new_pos)) < 0) -+ return ret; -+ i = new_pos; -+ continue; -+ } -+ -+ /* No need to convert it if the comparison func is NEVER; we don't -+ * read from the output in that case. */ -+ if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_NEVER) -+ continue; -+ -+ for (size_t j = 0; j < ins->dst_count; ++j) -+ { -+ struct vkd3d_shader_dst_param *dst = &ins->dst[j]; -+ -+ /* Note we run after I/O normalization. */ -+ if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == colour_signature_idx) -+ { -+ dst->reg.type = VKD3DSPR_TEMP; -+ dst->reg.idx[0].offset = colour_temp; -+ } -+ } -+ } -+ -+ return VKD3D_OK; -+} -+ - struct validation_context - { - struct vkd3d_shader_message_context *message_context; -@@ -5641,7 +6065,7 @@ static void vsir_validate_dst_count(struct validation_context *ctx, - if (instruction->dst_count != count) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, - "Invalid destination count %u for an instruction of type %#x, expected %u.", -- instruction->dst_count, instruction->handler_idx, count); -+ instruction->dst_count, instruction->opcode, count); - } - - static void vsir_validate_src_count(struct validation_context *ctx, -@@ -5650,7 +6074,7 @@ static void vsir_validate_src_count(struct validation_context *ctx, - if (instruction->src_count != count) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for an instruction of type %#x, expected %u.", -- instruction->src_count, instruction->handler_idx, count); -+ instruction->src_count, instruction->opcode, count); - } - - static bool vsir_validate_src_min_count(struct validation_context *ctx, -@@ -5660,7 +6084,7 @@ static bool vsir_validate_src_min_count(struct validation_context *ctx, - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for an instruction of type %#x, expected at least %u.", -- instruction->src_count, instruction->handler_idx, count); -+ instruction->src_count, instruction->opcode, count); - return false; - } - -@@ -5674,7 +6098,7 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, - "Invalid source count %u for an instruction of type %#x, expected at most %u.", -- instruction->src_count, instruction->handler_idx, count); -+ instruction->src_count, instruction->opcode, count); - return false; - } - -@@ -5697,11 +6121,11 @@ static const char *name_from_cf_type(enum cf_type type) - static void vsir_validate_cf_type(struct validation_context *ctx, - const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type) - { -- assert(ctx->cf_type != CF_TYPE_UNKNOWN); -- assert(expected_type != CF_TYPE_UNKNOWN); -+ VKD3D_ASSERT(ctx->cf_type != CF_TYPE_UNKNOWN); -+ VKD3D_ASSERT(expected_type != CF_TYPE_UNKNOWN); - if (ctx->cf_type != expected_type) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", -- instruction->handler_idx, name_from_cf_type(ctx->cf_type)); -+ instruction->opcode, name_from_cf_type(ctx->cf_type)); - } - - static void vsir_validate_instruction(struct validation_context *ctx) -@@ -5718,13 +6142,13 @@ static void vsir_validate_instruction(struct validation_context *ctx) - for (i = 0; i < instruction->src_count; ++i) - vsir_validate_src_param(ctx, &instruction->src[i]); - -- if (instruction->handler_idx >= VKD3DSIH_INVALID) -+ if (instruction->opcode >= VKD3DSIH_INVALID) - { - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.", -- instruction->handler_idx); -+ instruction->opcode); - } - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_HS_DECLS: - case VKD3DSIH_HS_CONTROL_POINT_PHASE: -@@ -5733,12 +6157,14 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_dst_count(ctx, instruction, 0); - vsir_validate_src_count(ctx, instruction, 0); - if (version->type != VKD3D_SHADER_TYPE_HULL) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Phase instruction %#x is only valid in a hull shader.", -- instruction->handler_idx); -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, -+ "Phase instruction %#x is only valid in a hull shader.", -+ instruction->opcode); - if (ctx->depth != 0) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Phase instruction %#x must appear to top level.", -- instruction->handler_idx); -- ctx->phase = instruction->handler_idx; -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Phase instruction %#x must appear to top level.", -+ instruction->opcode); -+ ctx->phase = instruction->opcode; - ctx->dcl_temps_found = false; - return; - -@@ -5812,7 +6238,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - && ctx->phase == VKD3DSIH_INVALID) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, - "Instruction %#x appear before any phase instruction in a hull shader.", -- instruction->handler_idx); -+ instruction->opcode); - - /* We support two different control flow types in shaders: - * block-based, like DXIL and SPIR-V, and structured, like D3DBC -@@ -5824,7 +6250,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - * block, but need for that hasn't arisen yet, so we don't. */ - if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction)) - { -- if (instruction->handler_idx == VKD3DSIH_LABEL) -+ if (instruction->opcode == VKD3DSIH_LABEL) - ctx->cf_type = CF_TYPE_BLOCKS; - else - ctx->cf_type = CF_TYPE_STRUCTURED; -@@ -5832,7 +6258,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - - if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction)) - { -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_LABEL: - if (ctx->inside_block) -@@ -5844,20 +6270,22 @@ static void vsir_validate_instruction(struct validation_context *ctx) - case VKD3DSIH_BRANCH: - case VKD3DSIH_SWITCH_MONOLITHIC: - if (!ctx->inside_block) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", -- instruction->handler_idx); -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Invalid instruction %#x outside any block.", -+ instruction->opcode); - ctx->inside_block = false; - break; - - default: - if (!ctx->inside_block) -- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", -- instruction->handler_idx); -+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, -+ "Invalid instruction %#x outside any block.", -+ instruction->opcode); - break; - } - } - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DCL_TEMPS: - vsir_validate_dst_count(ctx, instruction, 0); -@@ -5877,7 +6305,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; -- ctx->blocks[ctx->depth++] = instruction->handler_idx; -+ ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_IFC: -@@ -5896,7 +6324,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block."); - else -- ctx->blocks[ctx->depth - 1] = instruction->handler_idx; -+ ctx->blocks[ctx->depth - 1] = instruction->opcode; - break; - - case VKD3DSIH_ENDIF: -@@ -5915,7 +6343,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; -- ctx->blocks[ctx->depth++] = instruction->handler_idx; -+ ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_ENDLOOP: -@@ -5934,7 +6362,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; -- ctx->blocks[ctx->depth++] = instruction->handler_idx; -+ ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_ENDREP: -@@ -5953,7 +6381,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) - vsir_validate_src_count(ctx, instruction, 1); - if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) - return; -- ctx->blocks[ctx->depth++] = instruction->handler_idx; -+ ctx->blocks[ctx->depth++] = instruction->opcode; - break; - - case VKD3DSIH_ENDSWITCH: -@@ -6225,7 +6653,7 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t - return result; - } - -- if ((result = vsir_program_normalise_io_registers(program)) < 0) -+ if ((result = vsir_program_normalise_io_registers(program, message_context)) < 0) - return result; - - if ((result = instruction_array_normalise_flat_constants(program)) < 0) -@@ -6241,6 +6669,9 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t - return result; - } - -+ if ((result = vsir_program_insert_alpha_test(program, message_context)) < 0) -+ return result; -+ - if (TRACE_ON()) - vkd3d_shader_trace(program); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h -index 4860cf5f90e..9806614a35b 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.h -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h -@@ -141,7 +141,7 @@ void preproc_warning(struct preproc_ctx *ctx, const struct vkd3d_shader_location - - static inline struct preproc_file *preproc_get_top_file(struct preproc_ctx *ctx) - { -- assert(ctx->file_count); -+ VKD3D_ASSERT(ctx->file_count); - return &ctx->file_stack[ctx->file_count - 1]; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l -index be50d3b9020..7fc963192cf 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.l -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l -@@ -20,6 +20,7 @@ - - %{ - -+#include "preproc.h" - #include "preproc.tab.h" - - #undef ERROR /* defined in wingdi.h */ -@@ -408,7 +409,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - } - ctx->last_was_eof = false; - -- assert(ctx->file_count); -+ VKD3D_ASSERT(ctx->file_count); - if (!(token = preproc_lexer_lex(lval, lloc, scanner))) - { - ctx->last_was_eof = true; -@@ -646,7 +647,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) - { - struct preproc_text *current_arg = NULL; - -- assert(func_state->macro->arg_count); -+ VKD3D_ASSERT(func_state->macro->arg_count); - - if (func_state->arg_count < func_state->macro->arg_count) - current_arg = &func_state->macro->arg_values[func_state->arg_count]; -diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.y b/libs/vkd3d/libs/vkd3d-shader/preproc.y -index 009c35ffb97..366e351e3b5 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/preproc.y -+++ b/libs/vkd3d/libs/vkd3d-shader/preproc.y -@@ -119,7 +119,7 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati - macro->body.text = *body; - macro->body.location = *body_loc; - ret = rb_put(&ctx->macros, name, ¯o->entry); -- assert(!ret); -+ VKD3D_ASSERT(!ret); - return true; - } - -diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c -index 984a4f894f6..ed37ac5c45e 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/spirv.c -+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c -@@ -313,7 +313,7 @@ static bool vkd3d_spirv_stream_append(struct vkd3d_spirv_stream *dst_stream, - struct vkd3d_spirv_chunk *chunk; - size_t src_location = 0; - -- assert(list_empty(&dst_stream->inserted_chunks)); -+ VKD3D_ASSERT(list_empty(&dst_stream->inserted_chunks)); - - LIST_FOR_EACH_ENTRY(chunk, &src_stream->inserted_chunks, struct vkd3d_spirv_chunk, entry) - src_word_count += chunk->word_count; -@@ -322,16 +322,16 @@ static bool vkd3d_spirv_stream_append(struct vkd3d_spirv_stream *dst_stream, - dst_stream->word_count + src_word_count, sizeof(*dst_stream->words))) - return false; - -- assert(dst_stream->word_count + src_word_count <= dst_stream->capacity); -+ VKD3D_ASSERT(dst_stream->word_count + src_word_count <= dst_stream->capacity); - LIST_FOR_EACH_ENTRY(chunk, &src_stream->inserted_chunks, struct vkd3d_spirv_chunk, entry) - { -- assert(src_location <= chunk->location); -+ VKD3D_ASSERT(src_location <= chunk->location); - word_count = chunk->location - src_location; - memcpy(&dst_stream->words[dst_stream->word_count], &src_stream->words[src_location], - word_count * sizeof(*src_stream->words)); - dst_stream->word_count += word_count; - src_location += word_count; -- assert(src_location == chunk->location); -+ VKD3D_ASSERT(src_location == chunk->location); - - memcpy(&dst_stream->words[dst_stream->word_count], chunk->words, - chunk->word_count * sizeof(*chunk->words)); -@@ -464,7 +464,7 @@ static void vkd3d_spirv_set_execution_model(struct vkd3d_spirv_builder *builder, - - static uint32_t vkd3d_spirv_opcode_word(SpvOp op, unsigned int word_count) - { -- assert(!(op & ~SpvOpCodeMask)); -+ VKD3D_ASSERT(!(op & ~SpvOpCodeMask)); - return (word_count << SpvWordCountShift) | op; - } - -@@ -538,7 +538,7 @@ static int vkd3d_spirv_declaration_compare(const void *key, const struct rb_entr - return ret; - if ((ret = vkd3d_u32_compare(a->parameter_count, b->parameter_count))) - return ret; -- assert(a->parameter_count <= ARRAY_SIZE(a->parameters)); -+ VKD3D_ASSERT(a->parameter_count <= ARRAY_SIZE(a->parameters)); - return memcmp(&a->parameters, &b->parameters, a->parameter_count * sizeof(*a->parameters)); - } - -@@ -554,7 +554,7 @@ static void vkd3d_spirv_insert_declaration(struct vkd3d_spirv_builder *builder, - { - struct vkd3d_spirv_declaration *d; - -- assert(declaration->parameter_count <= ARRAY_SIZE(declaration->parameters)); -+ VKD3D_ASSERT(declaration->parameter_count <= ARRAY_SIZE(declaration->parameters)); - - if (!(d = vkd3d_malloc(sizeof(*d)))) - return; -@@ -823,7 +823,7 @@ static uint32_t vkd3d_spirv_build_op_tr2v(struct vkd3d_spirv_builder *builder, - static void vkd3d_spirv_begin_function_stream_insertion(struct vkd3d_spirv_builder *builder, - size_t location) - { -- assert(builder->insertion_location == ~(size_t)0); -+ VKD3D_ASSERT(builder->insertion_location == ~(size_t)0); - - if (vkd3d_spirv_stream_current_location(&builder->function_stream) == location) - return; -@@ -1166,7 +1166,7 @@ static uint32_t vkd3d_spirv_get_op_constant(struct vkd3d_spirv_builder *builder, - static uint32_t vkd3d_spirv_build_op_constant64(struct vkd3d_spirv_builder *builder, - uint32_t result_type, const uint32_t *values, unsigned int value_count) - { -- assert(value_count == 2); -+ VKD3D_ASSERT(value_count == 2); - return vkd3d_spirv_build_op_trv(builder, &builder->global_stream, - SpvOpConstant, result_type, values, value_count); - } -@@ -1583,13 +1583,13 @@ static uint32_t vkd3d_spirv_build_image_instruction(struct vkd3d_spirv_builder * - unsigned int index = 0, i; - uint32_t w[10]; - -- assert(operand_count <= ARRAY_SIZE(w)); -+ VKD3D_ASSERT(operand_count <= ARRAY_SIZE(w)); - for (i = 0; i < operand_count; ++i) - w[index++] = operands[i]; - - if (image_operands_mask) - { -- assert(index + 1 + image_operand_count <= ARRAY_SIZE(w)); -+ VKD3D_ASSERT(index + 1 + image_operand_count <= ARRAY_SIZE(w)); - w[index++] = image_operands_mask; - for (i = 0; i < image_operand_count; ++i) - w[index++] = image_operands[i]; -@@ -1606,9 +1606,9 @@ static uint32_t vkd3d_spirv_build_op_image_sample(struct vkd3d_spirv_builder *bu - const uint32_t operands[] = {sampled_image_id, coordinate_id}; - - if (op == SpvOpImageSampleExplicitLod) -- assert(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); -+ VKD3D_ASSERT(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); - else -- assert(op == SpvOpImageSampleImplicitLod); -+ VKD3D_ASSERT(op == SpvOpImageSampleImplicitLod); - - return vkd3d_spirv_build_image_instruction(builder, op, result_type, - operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); -@@ -1621,9 +1621,9 @@ static uint32_t vkd3d_spirv_build_op_image_sample_dref(struct vkd3d_spirv_builde - const uint32_t operands[] = {sampled_image_id, coordinate_id, dref_id}; - - if (op == SpvOpImageSampleDrefExplicitLod) -- assert(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); -+ VKD3D_ASSERT(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); - else -- assert(op == SpvOpImageSampleDrefImplicitLod); -+ VKD3D_ASSERT(op == SpvOpImageSampleDrefImplicitLod); - - return vkd3d_spirv_build_image_instruction(builder, op, result_type, - operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); -@@ -1752,6 +1752,22 @@ static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *bu - return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); - } - -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_swap(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id, uint32_t op_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); -+ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadSwap, result_type, -+ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, op_id); -+} -+ -+static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(struct vkd3d_spirv_builder *builder, -+ uint32_t result_type, uint32_t val_id, uint32_t index_id) -+{ -+ vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); -+ return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadBroadcast, result_type, -+ vkd3d_spirv_get_op_scope_subgroup(builder), val_id, index_id); -+} -+ - static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_builder *builder, - uint32_t result_type, uint32_t val_id) - { -@@ -1884,7 +1900,7 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, - } - else - { -- assert(component_type != VKD3D_SHADER_COMPONENT_VOID); -+ VKD3D_ASSERT(component_type != VKD3D_SHADER_COMPONENT_VOID); - scalar_id = vkd3d_spirv_get_type_id(builder, component_type, 1); - return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); - } -@@ -2140,6 +2156,8 @@ struct vkd3d_symbol_descriptor_array - unsigned int set; - unsigned int binding; - unsigned int push_constant_index; -+ bool write_only; -+ bool coherent; - }; - - struct vkd3d_symbol_register_data -@@ -2250,7 +2268,7 @@ static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, - case VKD3DSPR_OUTPUT: - case VKD3DSPR_PATCHCONST: - symbol->key.reg.idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : ~0u; -- assert(!reg->idx_count || symbol->key.reg.idx != ~0u); -+ VKD3D_ASSERT(!reg->idx_count || symbol->key.reg.idx != ~0u); - break; - - case VKD3DSPR_IMMCONSTBUFFER: -@@ -2377,6 +2395,7 @@ struct ssa_register_info - struct spirv_compiler - { - struct vkd3d_spirv_builder spirv_builder; -+ const struct vsir_program *program; - - struct vkd3d_shader_message_context *message_context; - struct vkd3d_shader_location location; -@@ -2403,6 +2422,11 @@ struct spirv_compiler - struct vkd3d_push_constant_buffer_binding *push_constants; - const struct vkd3d_shader_spirv_target_info *spirv_target_info; - -+ struct -+ { -+ uint32_t buffer_id; -+ } *spirv_parameter_info; -+ - bool prolog_emitted; - struct shader_signature input_signature; - struct shader_signature output_signature; -@@ -2490,6 +2514,8 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) - vkd3d_free(compiler->push_constants); - vkd3d_free(compiler->descriptor_offset_ids); - -+ vkd3d_free(compiler->spirv_parameter_info); -+ - vkd3d_spirv_builder_free(&compiler->spirv_builder); - - rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); -@@ -2513,13 +2539,10 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p - const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, - struct vkd3d_shader_message_context *message_context, uint64_t config_flags) - { -- const struct shader_signature *patch_constant_signature = &program->patch_constant_signature; -- const struct shader_signature *output_signature = &program->output_signature; - const struct vkd3d_shader_interface_info *shader_interface; - const struct vkd3d_shader_descriptor_offset_info *offset_info; - const struct vkd3d_shader_spirv_target_info *target_info; - struct spirv_compiler *compiler; -- unsigned int max_element_count; - unsigned int i; - - if (!(compiler = vkd3d_malloc(sizeof(*compiler)))) -@@ -2547,13 +2570,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p - compiler->spirv_target_info = target_info; - } - -- max_element_count = max(output_signature->element_count, patch_constant_signature->element_count); -- if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) -- { -- vkd3d_free(compiler); -- return NULL; -- } -- - vkd3d_spirv_builder_init(&compiler->spirv_builder, spirv_compiler_get_entry_point_name(compiler)); - - compiler->formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT -@@ -2893,7 +2909,7 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind - - if (is_uav_counter) - { -- assert(descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV); -+ VKD3D_ASSERT(descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV); - binding_offsets = compiler->offset_info.uav_counter_offsets; - for (i = 0; i < shader_interface->uav_counter_count; ++i) - { -@@ -3011,7 +3027,7 @@ static uint32_t spirv_compiler_get_constant(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - unsigned int i; - -- assert(0 < component_count && component_count <= VKD3D_VEC4_SIZE); -+ VKD3D_ASSERT(0 < component_count && component_count <= VKD3D_VEC4_SIZE); - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - - switch (component_type) -@@ -3052,7 +3068,7 @@ static uint32_t spirv_compiler_get_constant64(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - unsigned int i; - -- assert(0 < component_count && component_count <= VKD3D_DVEC2_SIZE); -+ VKD3D_ASSERT(0 < component_count && component_count <= VKD3D_DVEC2_SIZE); - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - - if (component_type != VKD3D_SHADER_COMPONENT_DOUBLE && component_type != VKD3D_SHADER_COMPONENT_UINT64) -@@ -3274,21 +3290,6 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil - return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); - } - --static const struct vkd3d_shader_parameter *spirv_compiler_get_shader_parameter( -- struct spirv_compiler *compiler, enum vkd3d_shader_parameter_name name) --{ -- const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; -- unsigned int i; -- -- for (i = 0; info && i < info->parameter_count; ++i) -- { -- if (info->parameters[i].name == name) -- return &info->parameters[i]; -- } -- -- return NULL; --} -- - static const struct vkd3d_spec_constant_info - { - enum vkd3d_shader_parameter_name name; -@@ -3298,6 +3299,7 @@ static const struct vkd3d_spec_constant_info - vkd3d_shader_parameters[] = - { - {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, 1, "sample_count"}, -+ {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, 0, "alpha_test_ref"}, - }; - - static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_shader_parameter_name name) -@@ -3318,12 +3320,11 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com - { - if (!compiler->current_spec_constant_id) - { -- const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; - unsigned int i, id = 0; - -- for (i = 0; info && i < info->parameter_count; ++i) -+ for (i = 0; i < compiler->program->parameter_count; ++i) - { -- const struct vkd3d_shader_parameter *current = &info->parameters[i]; -+ const struct vkd3d_shader_parameter1 *current = &compiler->program->parameters[i]; - - if (current->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) - id = max(current->u.specialization_constant.id + 1, id); -@@ -3336,7 +3337,7 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com - } - - static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compiler, -- enum vkd3d_shader_parameter_name name, uint32_t spec_id) -+ enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - const struct vkd3d_spec_constant_info *info; -@@ -3345,7 +3346,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile - info = get_spec_constant_info(name); - default_value = info ? info->default_value : 0; - -- type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); -+ type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); - id = vkd3d_spirv_build_op_spec_constant(builder, type_id, default_value); - vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationSpecId, spec_id); - -@@ -3364,7 +3365,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile - } - - static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler, -- enum vkd3d_shader_parameter_name name, uint32_t spec_id) -+ enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) - { - unsigned int i; - -@@ -3374,30 +3375,66 @@ static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler - return compiler->spec_constants[i].id; - } - -- return spirv_compiler_emit_spec_constant(compiler, name, spec_id); -+ return spirv_compiler_emit_spec_constant(compiler, name, spec_id, type); -+} -+ -+static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_parameter1 *parameter, enum vkd3d_data_type type) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ unsigned int index = parameter - compiler->program->parameters; -+ uint32_t type_id, ptr_id, ptr_type_id; -+ -+ type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); -+ ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); -+ ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, -+ compiler->spirv_parameter_info[index].buffer_id, -+ spirv_compiler_get_constant_uint(compiler, 0)); -+ return vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); - } - --static uint32_t spirv_compiler_emit_uint_shader_parameter(struct spirv_compiler *compiler, -- enum vkd3d_shader_parameter_name name) -+static uint32_t spirv_compiler_emit_shader_parameter(struct spirv_compiler *compiler, -+ enum vkd3d_shader_parameter_name name, enum vkd3d_data_type type) - { -- const struct vkd3d_shader_parameter *parameter; -+ const struct vkd3d_shader_parameter1 *parameter; - -- if (!(parameter = spirv_compiler_get_shader_parameter(compiler, name))) -+ static const struct -+ { -+ enum vkd3d_data_type type; -+ } -+ type_map[] = -+ { -+ [VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32] = {VKD3D_DATA_FLOAT}, -+ [VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32] = {VKD3D_DATA_UINT}, -+ }; -+ -+ if (!(parameter = vsir_program_get_parameter(compiler->program, name))) - { - WARN("Unresolved shader parameter %#x.\n", name); - goto default_parameter; - } - -+ if (type_map[parameter->data_type].type != type) -+ ERR("Expected data type %#x for parameter %#x, got %#x.\n", type, name, parameter->data_type); -+ - if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) -- return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); -+ { -+ if (parameter->data_type == VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) -+ return spirv_compiler_get_constant_float(compiler, parameter->u.immediate_constant.u.f32); -+ else -+ return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); -+ } -+ - if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) -- return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id); -+ return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id, type); -+ if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) -+ return spirv_compiler_get_buffer_parameter(compiler, parameter, type); - - FIXME("Unhandled parameter type %#x.\n", parameter->type); - - default_parameter: - return spirv_compiler_get_spec_constant(compiler, -- name, spirv_compiler_alloc_spec_constant_id(compiler)); -+ name, spirv_compiler_alloc_spec_constant_id(compiler), type); - } - - static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *compiler, -@@ -3409,7 +3446,7 @@ static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *comp - uint32_t type_id, result_id; - unsigned int i; - -- assert(val_component_idx < val_component_count); -+ VKD3D_ASSERT(val_component_idx < val_component_count); - - type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); - if (val_component_count == 1) -@@ -3470,11 +3507,11 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, - struct vkd3d_symbol reg_symbol, *symbol; - struct rb_entry *entry; - -- assert(!register_is_constant_or_undef(reg)); -+ VKD3D_ASSERT(!register_is_constant_or_undef(reg)); - - if (reg->type == VKD3DSPR_TEMP) - { -- assert(reg->idx[0].offset < compiler->temp_count); -+ VKD3D_ASSERT(reg->idx[0].offset < compiler->temp_count); - register_info->id = compiler->temp_id + reg->idx[0].offset; - register_info->storage_class = SpvStorageClassPrivate; - register_info->descriptor_array = NULL; -@@ -3605,7 +3642,7 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp - - if (reg->type == VKD3DSPR_CONSTBUFFER) - { -- assert(!reg->idx[0].rel_addr); -+ VKD3D_ASSERT(!reg->idx[0].rel_addr); - if (register_info->descriptor_array) - indexes[index_count++] = spirv_compiler_get_descriptor_index(compiler, reg, - register_info->descriptor_array, register_info->binding_base_idx, VKD3D_SHADER_RESOURCE_BUFFER); -@@ -3723,7 +3760,7 @@ static uint32_t spirv_compiler_emit_swizzle(struct spirv_compiler *compiler, - { - if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) - { -- assert(VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(swizzle, i) == val_write_mask); -+ VKD3D_ASSERT(VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(swizzle, i) == val_write_mask); - components[component_idx++] = val_id; - } - } -@@ -3748,7 +3785,7 @@ static uint32_t spirv_compiler_emit_vector_shuffle(struct spirv_compiler *compil - uint32_t type_id; - unsigned int i; - -- assert(component_count <= ARRAY_SIZE(components)); -+ VKD3D_ASSERT(component_count <= ARRAY_SIZE(components)); - - for (i = 0; i < component_count; ++i) - { -@@ -3771,7 +3808,7 @@ static uint32_t spirv_compiler_emit_int_to_bool(struct spirv_compiler *compiler, - uint32_t type_id; - SpvOp op; - -- assert(!(condition & ~(VKD3D_SHADER_CONDITIONAL_OP_NZ | VKD3D_SHADER_CONDITIONAL_OP_Z))); -+ VKD3D_ASSERT(!(condition & ~(VKD3D_SHADER_CONDITIONAL_OP_NZ | VKD3D_SHADER_CONDITIONAL_OP_Z))); - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); - op = condition & VKD3D_SHADER_CONDITIONAL_OP_Z ? SpvOpIEqual : SpvOpINotEqual; -@@ -3901,7 +3938,7 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile - uint32_t values[VKD3D_VEC4_SIZE] = {0}; - unsigned int i, j; - -- assert(reg->type == VKD3DSPR_IMMCONST); -+ VKD3D_ASSERT(reg->type == VKD3DSPR_IMMCONST); - - if (reg->dimension == VSIR_DIMENSION_SCALAR) - { -@@ -3929,7 +3966,7 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi - uint64_t values[VKD3D_DVEC2_SIZE] = {0}; - unsigned int i, j; - -- assert(reg->type == VKD3DSPR_IMMCONST64); -+ VKD3D_ASSERT(reg->type == VKD3DSPR_IMMCONST64); - - if (reg->dimension == VSIR_DIMENSION_SCALAR) - { -@@ -3956,7 +3993,7 @@ static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - uint32_t type_id; - -- assert(reg->type == VKD3DSPR_UNDEF); -+ VKD3D_ASSERT(reg->type == VKD3DSPR_UNDEF); - - type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); - return vkd3d_spirv_get_op_undef(builder, type_id); -@@ -3972,8 +4009,8 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, - enum vkd3d_shader_component_type component_type; - uint32_t skipped_component_mask; - -- assert(!register_is_constant_or_undef(reg)); -- assert(vsir_write_mask_component_count(write_mask) == 1); -+ VKD3D_ASSERT(!register_is_constant_or_undef(reg)); -+ VKD3D_ASSERT(vsir_write_mask_component_count(write_mask) == 1); - - component_idx = vsir_write_mask_get_component_idx(write_mask); - component_idx = vsir_swizzle_get_component(swizzle, component_idx); -@@ -4096,8 +4133,8 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil - static const struct ssa_register_info *spirv_compiler_get_ssa_register_info(const struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg) - { -- assert(reg->idx[0].offset < compiler->ssa_register_count); -- assert(reg->idx_count == 1); -+ VKD3D_ASSERT(reg->idx[0].offset < compiler->ssa_register_count); -+ VKD3D_ASSERT(reg->idx_count == 1); - return &compiler->ssa_register_info[reg->idx[0].offset]; - } - -@@ -4105,7 +4142,7 @@ static void spirv_compiler_set_ssa_register_info(const struct spirv_compiler *co - const struct vkd3d_shader_register *reg, uint32_t val_id) - { - unsigned int i = reg->idx[0].offset; -- assert(i < compiler->ssa_register_count); -+ VKD3D_ASSERT(i < compiler->ssa_register_count); - compiler->ssa_register_info[i].data_type = reg->data_type; - compiler->ssa_register_info[i].id = val_id; - } -@@ -4125,10 +4162,10 @@ static uint32_t spirv_compiler_emit_load_ssa_reg(struct spirv_compiler *compiler - if (!val_id) - { - /* Should only be from a missing instruction implementation. */ -- assert(compiler->failed); -+ VKD3D_ASSERT(compiler->failed); - return 0; - } -- assert(vkd3d_swizzle_is_scalar(swizzle, reg)); -+ VKD3D_ASSERT(vkd3d_swizzle_is_scalar(swizzle, reg)); - - reg_component_type = vkd3d_component_type_from_data_type(ssa->data_type); - -@@ -4172,6 +4209,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, - return spirv_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); - else if (reg->type == VKD3DSPR_UNDEF) - return spirv_compiler_emit_load_undef(compiler, reg, write_mask); -+ else if (reg->type == VKD3DSPR_PARAMETER) -+ return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset, reg->data_type); - - component_count = vsir_write_mask_component_count(write_mask); - component_type = vkd3d_component_type_from_data_type(reg->data_type); -@@ -4348,7 +4387,7 @@ static void spirv_compiler_emit_store(struct spirv_compiler *compiler, - unsigned int i, src_idx, dst_idx; - uint32_t type_id, dst_val_id; - -- assert(write_mask); -+ VKD3D_ASSERT(write_mask); - - component_count = vsir_write_mask_component_count(write_mask); - dst_component_count = vsir_write_mask_component_count(dst_write_mask); -@@ -4373,7 +4412,7 @@ static void spirv_compiler_emit_store(struct spirv_compiler *compiler, - type_id = vkd3d_spirv_get_type_id(builder, component_type, dst_component_count); - dst_val_id = vkd3d_spirv_build_op_load(builder, type_id, dst_id, SpvMemoryAccessMaskNone); - -- assert(component_count <= ARRAY_SIZE(components)); -+ VKD3D_ASSERT(component_count <= ARRAY_SIZE(components)); - - for (i = 0, src_idx = 0, dst_idx = 0; dst_idx < VKD3D_VEC4_SIZE; ++dst_idx) - { -@@ -4402,7 +4441,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, - uint32_t src_write_mask = write_mask; - uint32_t type_id; - -- assert(!register_is_constant_or_undef(reg)); -+ VKD3D_ASSERT(!register_is_constant_or_undef(reg)); - - if (reg->type == VKD3DSPR_SSA) - { -@@ -4461,7 +4500,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, - static void spirv_compiler_emit_store_dst(struct spirv_compiler *compiler, - const struct vkd3d_shader_dst_param *dst, uint32_t val_id) - { -- assert(!(dst->modifiers & ~VKD3DSPDM_SATURATE)); -+ VKD3D_ASSERT(!(dst->modifiers & ~VKD3DSPDM_SATURATE)); - if (dst->modifiers & VKD3DSPDM_SATURATE) - val_id = spirv_compiler_emit_sat(compiler, &dst->reg, dst->write_mask, val_id); - -@@ -4893,7 +4932,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler - { - struct vkd3d_shader_register r; - -- assert(compiler->shader_type == VKD3D_SHADER_TYPE_HULL); -+ VKD3D_ASSERT(compiler->shader_type == VKD3D_SHADER_TYPE_HULL); - - vsir_register_init(&r, VKD3DSPR_OUTPOINTID, VKD3D_DATA_FLOAT, 0); - return spirv_compiler_get_register_id(compiler, &r); -@@ -5013,7 +5052,7 @@ static uint32_t spirv_compiler_emit_builtin_variable_v(struct spirv_compiler *co - unsigned int sizes[2]; - uint32_t id; - -- assert(size_count <= ARRAY_SIZE(sizes)); -+ VKD3D_ASSERT(size_count <= ARRAY_SIZE(sizes)); - memcpy(sizes, array_sizes, size_count * sizeof(sizes[0])); - array_sizes = sizes; - sizes[0] = max(sizes[0], builtin->spirv_array_size); -@@ -5175,7 +5214,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, - use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, - use_private_var ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); - reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; -- assert(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); -+ VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); - spirv_compiler_put_symbol(compiler, ®_symbol); - - vkd3d_spirv_build_op_name(builder, var_id, reg_type == VKD3DSPR_PATCHCONST ? "vpc%u" : "v%u", element_idx); -@@ -5221,8 +5260,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, - uint32_t write_mask; - uint32_t input_id; - -- assert(!reg->idx_count || !reg->idx[0].rel_addr); -- assert(reg->idx_count < 2); -+ VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); -+ VKD3D_ASSERT(reg->idx_count < 2); - - if (!(builtin = get_spirv_builtin_for_register(reg->type))) - { -@@ -5356,8 +5395,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, - uint32_t write_mask; - uint32_t output_id; - -- assert(!reg->idx_count || !reg->idx[0].rel_addr); -- assert(reg->idx_count < 2); -+ VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); -+ VKD3D_ASSERT(reg->idx_count < 2); - - if (!(builtin = get_spirv_builtin_for_register(reg->type))) - { -@@ -5543,7 +5582,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, - use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, - use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); - reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; -- assert(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); -+ VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); - - spirv_compiler_put_symbol(compiler, ®_symbol); - -@@ -5881,7 +5920,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t - function_location = spirv_compiler_get_current_function_location(compiler); - vkd3d_spirv_begin_function_stream_insertion(builder, function_location); - -- assert(!compiler->temp_count); -+ VKD3D_ASSERT(!compiler->temp_count); - compiler->temp_count = count; - for (i = 0; i < compiler->temp_count; ++i) - { -@@ -5889,7 +5928,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t - SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); - if (!i) - compiler->temp_id = id; -- assert(id == compiler->temp_id + i); -+ VKD3D_ASSERT(id == compiler->temp_id + i); - - vkd3d_spirv_build_op_name(builder, id, "r%u", i); - } -@@ -5899,7 +5938,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t - - static void spirv_compiler_allocate_ssa_register_ids(struct spirv_compiler *compiler, unsigned int count) - { -- assert(!compiler->ssa_register_info); -+ VKD3D_ASSERT(!compiler->ssa_register_info); - if (!(compiler->ssa_register_info = vkd3d_calloc(count, sizeof(*compiler->ssa_register_info)))) - { - ERR("Failed to allocate SSA register value id array, count %u.\n", count); -@@ -6001,7 +6040,7 @@ static void spirv_compiler_emit_push_constant_buffers(struct spirv_compiler *com - vkd3d_spirv_build_op_decorate1(builder, member_ids[j], SpvDecorationArrayStride, 4); - descriptor_offsets_member_idx = j; - compiler->descriptor_offsets_member_id = spirv_compiler_get_constant_uint(compiler, j); -- assert(j == count - 1); -+ VKD3D_ASSERT(j == count - 1); - } - - struct_id = vkd3d_spirv_build_op_type_struct(builder, member_ids, count); -@@ -6041,21 +6080,54 @@ static void spirv_compiler_emit_push_constant_buffers(struct spirv_compiler *com - } - } - -+static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( -+ struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, -+ const struct vkd3d_shader_register_range *range) -+{ -+ const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; -+ unsigned int register_last = (range->last == ~0u) ? range->first : range->last; -+ const struct vkd3d_shader_descriptor_info1 *d; -+ unsigned int i; -+ -+ for (i = 0; i < descriptor_info->descriptor_count; ++i) -+ { -+ d = &descriptor_info->descriptors[i]; -+ if (d->type == type && d->register_space == range->space && d->register_index <= range->first -+ && (d->count == ~0u || d->count > register_last - d->register_index)) -+ return d; -+ } -+ -+ return NULL; -+} -+ - struct vkd3d_descriptor_variable_info - { - const struct vkd3d_symbol *array_symbol; - unsigned int binding_base_idx; - }; - -+static void spirv_compiler_decorate_descriptor(struct spirv_compiler *compiler, -+ uint32_t var_id, bool write_only, bool coherent) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ -+ if (write_only) -+ vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); -+ if (coherent) -+ vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationCoherent, NULL, 0); -+} -+ - static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler *compiler, - SpvStorageClass storage_class, uint32_t type_id, const struct vkd3d_shader_register *reg, - const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, -- bool is_uav_counter, struct vkd3d_descriptor_variable_info *var_info) -+ bool is_uav, bool is_uav_counter, struct vkd3d_descriptor_variable_info *var_info) - { - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - struct vkd3d_descriptor_binding_address binding_address; - struct vkd3d_shader_descriptor_binding binding; -+ const struct vkd3d_shader_descriptor_info1 *d; - uint32_t array_type_id, ptr_type_id, var_id; -+ bool write_only = false, coherent = false; - struct vkd3d_symbol symbol; - struct rb_entry *entry; - -@@ -6063,6 +6135,14 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * - resource_type, is_uav_counter, &binding_address); - var_info->binding_base_idx = binding_address.binding_base_idx; - -+ if (is_uav) -+ { -+ d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); -+ write_only = !(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); -+ /* ROVs are implicitly globally coherent. */ -+ coherent = d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW); -+ } -+ - if (binding.count == 1 && range->first == binding_address.binding_base_idx && range->last != ~0u - && binding_address.push_constant_index == ~0u) - { -@@ -6072,6 +6152,7 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * - - spirv_compiler_emit_descriptor_binding(compiler, var_id, &binding); - spirv_compiler_emit_register_debug_name(builder, var_id, reg); -+ spirv_compiler_decorate_descriptor(compiler, var_id, write_only, coherent); - - var_info->array_symbol = NULL; - return var_id; -@@ -6089,6 +6170,8 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * - symbol.key.descriptor_array.set = binding.set; - symbol.key.descriptor_array.binding = binding.binding; - symbol.key.descriptor_array.push_constant_index = binding_address.push_constant_index; -+ symbol.key.descriptor_array.write_only = write_only; -+ symbol.key.descriptor_array.coherent = coherent; - if ((entry = rb_get(&compiler->symbol_table, &symbol))) - { - var_info->array_symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); -@@ -6099,6 +6182,7 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * - ptr_type_id, storage_class, 0); - spirv_compiler_emit_descriptor_binding(compiler, var_id, &binding); - spirv_compiler_emit_register_debug_name(builder, var_id, reg); -+ spirv_compiler_decorate_descriptor(compiler, var_id, write_only, coherent); - - symbol.id = var_id; - symbol.descriptor_array = NULL; -@@ -6155,7 +6239,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, - vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); - - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, -- ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); -+ ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, false, &var_info); - - vkd3d_symbol_make_register(®_symbol, ®); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -@@ -6212,7 +6296,7 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi - - type_id = vkd3d_spirv_get_op_type_sampler(builder); - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, -- range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); -+ range, VKD3D_SHADER_RESOURCE_NONE, false, false, &var_info); - - vkd3d_symbol_make_register(®_symbol, ®); - vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, -@@ -6259,26 +6343,6 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty - } - } - --static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( -- struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, -- const struct vkd3d_shader_register_range *range) --{ -- const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; -- unsigned int register_last = (range->last == ~0u) ? range->first : range->last; -- const struct vkd3d_shader_descriptor_info1 *d; -- unsigned int i; -- -- for (i = 0; i < descriptor_info->descriptor_count; ++i) -- { -- d = &descriptor_info->descriptors[i]; -- if (d->type == type && d->register_space == range->space && d->register_index <= range->first -- && (d->count == ~0u || d->count > register_last - d->register_index)) -- return d; -- } -- -- return NULL; --} -- - static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, - const struct vkd3d_spirv_resource_type *resource_type_info, enum vkd3d_shader_component_type data_type, -@@ -6457,7 +6521,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - } - - var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, -- range, resource_type, false, &var_info); -+ range, resource_type, is_uav, false, &var_info); - - if (is_uav) - { -@@ -6465,13 +6529,6 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - - d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); - -- if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) -- vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); -- -- /* ROVs are implicitly globally coherent. */ -- if (d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW)) -- vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationCoherent, NULL, 0); -- - if (d->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) - { - if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) -@@ -6488,7 +6545,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - - if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) - { -- assert(structure_stride); /* counters are valid only for structured buffers */ -+ VKD3D_ASSERT(structure_stride); /* counters are valid only for structured buffers */ - - counter_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - if (spirv_compiler_is_opengl_target(compiler)) -@@ -6514,7 +6571,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp - } - - counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, -- type_id, ®, range, resource_type, true, &counter_var_info); -+ type_id, ®, range, resource_type, false, true, &counter_var_info); - } - } - -@@ -6831,7 +6888,7 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, - uint32_t function_id, void_id, function_type_id; - struct vkd3d_shader_phase *phase; - -- assert(compiler->phase != instruction->handler_idx); -+ VKD3D_ASSERT(compiler->phase != instruction->opcode); - - if (!is_in_default_phase(compiler)) - spirv_compiler_leave_shader_phase(compiler); -@@ -6843,16 +6900,16 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, - vkd3d_spirv_build_op_function(builder, void_id, function_id, - SpvFunctionControlMaskNone, function_type_id); - -- compiler->phase = instruction->handler_idx; -+ compiler->phase = instruction->opcode; - spirv_compiler_emit_shader_phase_name(compiler, function_id, NULL); - -- phase = (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) -+ phase = (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) - ? &compiler->control_point_phase : &compiler->patch_constant_phase; - phase->function_id = function_id; - /* The insertion location must be set after the label is emitted. */ - phase->function_location = 0; - -- if (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) -+ if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) - compiler->emit_default_control_point_phase = instruction->flags; - } - -@@ -6908,7 +6965,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile - input_reg.idx[1].offset = 0; - input_id = spirv_compiler_get_register_id(compiler, &input_reg); - -- assert(input_signature->element_count == output_signature->element_count); -+ VKD3D_ASSERT(input_signature->element_count == output_signature->element_count); - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *output = &output_signature->elements[i]; -@@ -6916,8 +6973,8 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile - struct vkd3d_shader_register_info output_reg_info; - struct vkd3d_shader_register output_reg; - -- assert(input->mask == output->mask); -- assert(input->component_type == output->component_type); -+ VKD3D_ASSERT(input->mask == output->mask); -+ VKD3D_ASSERT(input->component_type == output->component_type); - - input_reg.idx[1].offset = i; - input_id = spirv_compiler_get_register_id(compiler, &input_reg); -@@ -7016,7 +7073,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru - { - static const struct - { -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - SpvOp spirv_op; - } - alu_ops[] = -@@ -7056,7 +7113,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru - - for (i = 0; i < ARRAY_SIZE(alu_ops); ++i) - { -- if (alu_ops[i].handler_idx == instruction->handler_idx) -+ if (alu_ops[i].opcode == instruction->opcode) - return alu_ops[i].spirv_op; - } - -@@ -7065,7 +7122,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru - - static SpvOp spirv_compiler_map_logical_instruction(const struct vkd3d_shader_instruction *instruction) - { -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_AND: - return SpvOpLogicalAnd; -@@ -7085,25 +7142,25 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, - const struct vkd3d_shader_src_param *src = instruction->src; - uint32_t val_id; - -- assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); -+ VKD3D_ASSERT(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); - - val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) - { -- val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); -+ val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); - } - else if (dst->reg.data_type == VKD3D_DATA_DOUBLE) - { - /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ -- val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); -+ val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); - } - else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) - { -- val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); -+ val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); - } - else if (dst->reg.data_type == VKD3D_DATA_UINT64) - { -- val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); -+ val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); - } - else - { -@@ -7126,7 +7183,7 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - SpvOp op = SpvOpMax; - unsigned int i; - -- if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) -+ if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->opcode == VKD3DSIH_COUNTBITS) - { - /* At least some drivers support this anyway, but if validation is enabled it will fail. */ - FIXME("Unsupported 64-bit source for bit count.\n"); -@@ -7142,8 +7199,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - /* VSIR supports logic ops AND/OR/XOR on bool values. */ - op = spirv_compiler_map_logical_instruction(instruction); - } -- else if (instruction->handler_idx == VKD3DSIH_ITOF || instruction->handler_idx == VKD3DSIH_UTOF -- || instruction->handler_idx == VKD3DSIH_ITOI || instruction->handler_idx == VKD3DSIH_UTOU) -+ else if (instruction->opcode == VKD3DSIH_ITOF || instruction->opcode == VKD3DSIH_UTOF -+ || instruction->opcode == VKD3DSIH_ITOI || instruction->opcode == VKD3DSIH_UTOU) - { - /* VSIR supports cast from bool to signed/unsigned integer types and floating point types, - * where bool is treated as a 1-bit integer and a signed 'true' value converts to -1. */ -@@ -7158,14 +7215,14 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - - if (op == SpvOpMax) - { -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, -- "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); -+ "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); - return VKD3D_ERROR_INVALID_SHADER; - } - -- assert(instruction->dst_count == 1); -- assert(instruction->src_count <= SPIRV_MAX_SRC_COUNT); -+ VKD3D_ASSERT(instruction->dst_count == 1); -+ VKD3D_ASSERT(instruction->src_count <= SPIRV_MAX_SRC_COUNT); - - type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); - -@@ -7179,8 +7236,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil - * Microsoft fxc will compile immediate constants larger than 5 bits. - * Fixing up the constants would be more elegant, but the simplest way is - * to let this handle constants too. */ -- if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->handler_idx == VKD3DSIH_ISHL -- || instruction->handler_idx == VKD3DSIH_ISHR || instruction->handler_idx == VKD3DSIH_USHR)) -+ if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->opcode == VKD3DSIH_ISHL -+ || instruction->opcode == VKD3DSIH_ISHR || instruction->opcode == VKD3DSIH_USHR)) - { - uint32_t mask_id = spirv_compiler_get_constant_vector(compiler, - VKD3D_SHADER_COMPONENT_UINT, vsir_write_mask_component_count(dst->write_mask), 0x1f); -@@ -7218,7 +7275,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( - { - static const struct - { -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - enum GLSLstd450 glsl_inst; - } - glsl_insts[] = -@@ -7258,7 +7315,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( - - for (i = 0; i < ARRAY_SIZE(glsl_insts); ++i) - { -- if (glsl_insts[i].handler_idx == instruction->handler_idx) -+ if (glsl_insts[i].opcode == instruction->opcode) - return glsl_insts[i].glsl_inst; - } - -@@ -7276,27 +7333,27 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp - unsigned int i, component_count; - enum GLSLstd450 glsl_inst; - -- if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI -- || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) -+ if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->opcode == VKD3DSIH_FIRSTBIT_HI -+ || instruction->opcode == VKD3DSIH_FIRSTBIT_LO || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI)) - { - /* At least some drivers support this anyway, but if validation is enabled it will fail. */ -- FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); -+ FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->opcode); - spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -- "64-bit source for handler %#x is not supported.", instruction->handler_idx); -+ "64-bit source for handler %#x is not supported.", instruction->opcode); - return; - } - - glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); - if (glsl_inst == GLSLstd450Bad) - { -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - - instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); - -- assert(instruction->dst_count == 1); -- assert(instruction->src_count <= SPIRV_MAX_SRC_COUNT); -+ VKD3D_ASSERT(instruction->dst_count == 1); -+ VKD3D_ASSERT(instruction->src_count <= SPIRV_MAX_SRC_COUNT); - - type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); - -@@ -7306,8 +7363,8 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp - val_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, - instr_set_id, glsl_inst, src_id, instruction->src_count); - -- if (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI -- || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI) -+ if (instruction->opcode == VKD3DSIH_FIRSTBIT_HI -+ || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI) - { - /* In D3D bits are numbered from the most significant bit. */ - component_count = vsir_write_mask_component_count(dst->write_mask); -@@ -7415,7 +7472,7 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, - - if (src[0].reg.data_type != VKD3D_DATA_BOOL) - { -- if (instruction->handler_idx == VKD3DSIH_CMP) -+ if (instruction->opcode == VKD3DSIH_CMP) - condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, - vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, - spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); -@@ -7437,7 +7494,7 @@ static void spirv_compiler_emit_swapc(struct spirv_compiler *compiler, - uint32_t condition_id, src1_id, src2_id, type_id, val_id; - unsigned int component_count; - -- assert(dst[0].write_mask == dst[1].write_mask); -+ VKD3D_ASSERT(dst[0].write_mask == dst[1].write_mask); - - condition_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); - src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); -@@ -7469,14 +7526,14 @@ static void spirv_compiler_emit_dot(struct spirv_compiler *compiler, - component_count = vsir_write_mask_component_count(dst->write_mask); - component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); - -- if (instruction->handler_idx == VKD3DSIH_DP4) -+ if (instruction->opcode == VKD3DSIH_DP4) - write_mask = VKD3DSP_WRITEMASK_ALL; -- else if (instruction->handler_idx == VKD3DSIH_DP3) -+ else if (instruction->opcode == VKD3DSIH_DP3) - write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_2; - else - write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; - -- assert(instruction->src_count == ARRAY_SIZE(src_ids)); -+ VKD3D_ASSERT(instruction->src_count == ARRAY_SIZE(src_ids)); - for (i = 0; i < ARRAY_SIZE(src_ids); ++i) - src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], write_mask); - -@@ -7606,8 +7663,8 @@ static void spirv_compiler_emit_int_div(struct spirv_compiler *compiler, - unsigned int component_count = 0; - SpvOp div_op, mod_op; - -- div_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; -- mod_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; -+ div_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; -+ mod_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; - - if (dst[0].reg.type != VKD3DSPR_NULL) - { -@@ -7668,8 +7725,8 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, - enum vkd3d_shader_component_type component_type; - unsigned int component_count; - -- assert(instruction->dst_count == 1); -- assert(instruction->src_count == 1); -+ VKD3D_ASSERT(instruction->dst_count == 1); -+ VKD3D_ASSERT(instruction->src_count == 1); - - /* OpConvertFToI has undefined results if the result cannot be represented - * as a signed integer, but Direct3D expects the result to saturate, -@@ -7721,8 +7778,8 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, - uint32_t src_type_id, dst_type_id, condition_type_id; - unsigned int component_count; - -- assert(instruction->dst_count == 1); -- assert(instruction->src_count == 1); -+ VKD3D_ASSERT(instruction->dst_count == 1); -+ VKD3D_ASSERT(instruction->src_count == 1); - - /* OpConvertFToU has undefined results if the result cannot be represented - * as an unsigned integer, but Direct3D expects the result to saturate, -@@ -7770,7 +7827,7 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp - SpvOp op; - - src_count = instruction->src_count; -- assert(2 <= src_count && src_count <= ARRAY_SIZE(src_ids)); -+ VKD3D_ASSERT(2 <= src_count && src_count <= ARRAY_SIZE(src_ids)); - - component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); - type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); -@@ -7778,17 +7835,17 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp - mask_id = spirv_compiler_get_constant_uint(compiler, size - 1); - size_id = spirv_compiler_get_constant_uint(compiler, size); - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_BFI: op = SpvOpBitFieldInsert; break; - case VKD3DSIH_IBFE: op = SpvOpBitFieldSExtract; break; - case VKD3DSIH_UBFE: op = SpvOpBitFieldUExtract; break; - default: -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); - for (i = 0, k = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) -@@ -7832,7 +7889,7 @@ static void spirv_compiler_emit_f16tof32(struct spirv_compiler *compiler, - scalar_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 1); - - /* FIXME: Consider a single UnpackHalf2x16 instruction per 2 components. */ -- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); - for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) -@@ -7866,7 +7923,7 @@ static void spirv_compiler_emit_f32tof16(struct spirv_compiler *compiler, - zero_id = spirv_compiler_get_constant_float(compiler, 0.0f); - - /* FIXME: Consider a single PackHalf2x16 instruction per 2 components. */ -- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); - for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) -@@ -7895,7 +7952,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co - unsigned int component_count; - SpvOp op; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DEQO: - case VKD3DSIH_EQO: op = SpvOpFOrdEqual; break; -@@ -7916,7 +7973,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co - case VKD3DSIH_UGE: op = SpvOpUGreaterThanEqual; break; - case VKD3DSIH_ULT: op = SpvOpULessThan; break; - default: -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -7949,7 +8006,7 @@ static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *c - src0_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src0_id); - src1_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src1_id); - val_id = vkd3d_spirv_build_op_logical_or(builder, type_id, src0_id, src1_id); -- if (instruction->handler_idx == VKD3DSIH_ORD) -+ if (instruction->opcode == VKD3DSIH_ORD) - val_id = vkd3d_spirv_build_op_logical_not(builder, type_id, val_id); - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } -@@ -7964,7 +8021,7 @@ static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compil - unsigned int component_count; - SpvOp op; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; - case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; -@@ -8113,6 +8170,8 @@ static void spirv_compiler_emit_discard(struct spirv_compiler *compiler, - if (src->reg.data_type != VKD3D_DATA_BOOL) - condition_id = spirv_compiler_emit_int_to_bool(compiler, - instruction->flags, src->reg.data_type, 1, condition_id); -+ else if (instruction->flags & VKD3D_SHADER_CONDITIONAL_OP_Z) -+ condition_id = vkd3d_spirv_build_op_logical_not(builder, vkd3d_spirv_get_op_type_bool(builder), condition_id); - void_id = vkd3d_spirv_get_op_type_void(builder); - vkd3d_spirv_build_op_function_call(builder, void_id, spirv_compiler_get_discard_function_id(compiler), - &condition_id, 1); -@@ -8262,7 +8321,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile - - static const struct instruction_info - { -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - SpvOp op; - bool needs_derivative_control; - } -@@ -8279,7 +8338,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile - info = NULL; - for (i = 0; i < ARRAY_SIZE(deriv_instructions); ++i) - { -- if (deriv_instructions[i].handler_idx == instruction->handler_idx) -+ if (deriv_instructions[i].opcode == instruction->opcode) - { - info = &deriv_instructions[i]; - break; -@@ -8287,15 +8346,15 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile - } - if (!info) - { -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - - if (info->needs_derivative_control) - vkd3d_spirv_enable_capability(builder, SpvCapabilityDerivativeControl); - -- assert(instruction->dst_count == 1); -- assert(instruction->src_count == 1); -+ VKD3D_ASSERT(instruction->dst_count == 1); -+ VKD3D_ASSERT(instruction->src_count == 1); - - type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); - src_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -@@ -8329,7 +8388,7 @@ static const struct vkd3d_symbol *spirv_compiler_find_resource(struct spirv_comp - - vkd3d_symbol_make_resource(&resource_key, resource_reg); - entry = rb_get(&compiler->symbol_table, &resource_key); -- assert(entry); -+ VKD3D_ASSERT(entry); - return RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - } - -@@ -8438,8 +8497,8 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, - { - struct vkd3d_shader_register_info register_info; - -- assert(image->image_id); -- assert(sampler_reg); -+ VKD3D_ASSERT(image->image_id); -+ VKD3D_ASSERT(sampler_reg); - - if (!spirv_compiler_get_register_info(compiler, sampler_reg, ®ister_info)) - ERR("Failed to get sampler register info.\n"); -@@ -8497,7 +8556,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, - uint32_t coordinate_mask; - bool multisample; - -- multisample = instruction->handler_idx == VKD3DSIH_LD2DMS; -+ multisample = instruction->opcode == VKD3DSIH_LD2DMS; - - spirv_compiler_prepare_image(compiler, &image, &src[1].reg, NULL, VKD3D_IMAGE_FLAG_NONE); - -@@ -8522,7 +8581,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, - image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, - &src[2], VKD3DSP_WRITEMASK_0); - } -- assert(image_operand_count <= ARRAY_SIZE(image_operands)); -+ VKD3D_ASSERT(image_operand_count <= ARRAY_SIZE(image_operands)); - val_id = vkd3d_spirv_build_op_image_fetch(builder, type_id, - image.image_id, coordinate_id, operands_mask, image_operands, image_operand_count); - -@@ -8576,7 +8635,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, - spirv_compiler_prepare_image(compiler, &image, - &resource->reg, &sampler->reg, VKD3D_IMAGE_FLAG_SAMPLED); - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_SAMPLE: - op = SpvOpImageSampleImplicitLod; -@@ -8603,7 +8662,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, - &src[3], VKD3DSP_WRITEMASK_0); - break; - default: -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -8616,7 +8675,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, - - sampled_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); - coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL); -- assert(image_operand_count <= ARRAY_SIZE(image_operands)); -+ VKD3D_ASSERT(image_operand_count <= ARRAY_SIZE(image_operands)); - val_id = vkd3d_spirv_build_op_image_sample(builder, op, sampled_type_id, - image.sampled_image_id, coordinate_id, operands_mask, image_operands, image_operand_count); - -@@ -8637,7 +8696,7 @@ static void spirv_compiler_emit_sample_c(struct spirv_compiler *compiler, - uint32_t image_operands[2]; - SpvOp op; - -- if (instruction->handler_idx == VKD3DSIH_SAMPLE_C_LZ) -+ if (instruction->opcode == VKD3DSIH_SAMPLE_C_LZ) - { - op = SpvOpImageSampleDrefExplicitLod; - operands_mask |= SpvImageOperandsLodMask; -@@ -8687,12 +8746,12 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, - uint32_t coordinate_mask; - bool extended_offset; - -- if (instruction->handler_idx == VKD3DSIH_GATHER4_C -- || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C) -+ if (instruction->opcode == VKD3DSIH_GATHER4_C -+ || instruction->opcode == VKD3DSIH_GATHER4_PO_C) - image_flags |= VKD3D_IMAGE_FLAG_DEPTH; - -- extended_offset = instruction->handler_idx == VKD3DSIH_GATHER4_PO -- || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C; -+ extended_offset = instruction->opcode == VKD3DSIH_GATHER4_PO -+ || instruction->opcode == VKD3DSIH_GATHER4_PO_C; - - addr = &src[0]; - offset = extended_offset ? &src[1] : NULL; -@@ -8801,7 +8860,7 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler - type_id, resource_symbol->info.resource.structure_stride, - &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - -- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); - for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) -@@ -8833,7 +8892,7 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler - type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - - texel_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); -- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); - for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) -@@ -8876,7 +8935,7 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, - base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, - type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - -- assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); -+ VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); - for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) -@@ -8939,7 +8998,7 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * - &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - - data = &src[instruction->src_count - 1]; -- assert(data->reg.data_type == VKD3D_DATA_UINT); -+ VKD3D_ASSERT(data->reg.data_type == VKD3D_DATA_UINT); - val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); - - component_count = vsir_write_mask_component_count(dst->write_mask); -@@ -8963,12 +9022,11 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * - { - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - spirv_compiler_prepare_image(compiler, &image, &dst->reg, NULL, VKD3D_IMAGE_FLAG_NONE); -- assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !image.structure_stride); - base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, - type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - - data = &src[instruction->src_count - 1]; -- assert(data->reg.data_type == VKD3D_DATA_UINT); -+ VKD3D_ASSERT(data->reg.data_type == VKD3D_DATA_UINT); - val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); - - component_count = vsir_write_mask_component_count(dst->write_mask); -@@ -9007,7 +9065,6 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); -- assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !reg_info.structure_stride); - base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, - type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - -@@ -9145,12 +9202,12 @@ static void spirv_compiler_emit_uav_counter_instruction(struct spirv_compiler *c - uint32_t operands[3]; - SpvOp op; - -- op = instruction->handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC -+ op = instruction->opcode == VKD3DSIH_IMM_ATOMIC_ALLOC - ? SpvOpAtomicIIncrement : SpvOpAtomicIDecrement; - - resource_symbol = spirv_compiler_find_resource(compiler, &src->reg); - counter_id = resource_symbol->info.resource.uav_counter_id; -- assert(counter_id); -+ VKD3D_ASSERT(counter_id); - - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - -@@ -9211,7 +9268,7 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins - { - static const struct - { -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - SpvOp spirv_op; - } - atomic_ops[] = -@@ -9240,16 +9297,16 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins - - for (i = 0; i < ARRAY_SIZE(atomic_ops); ++i) - { -- if (atomic_ops[i].handler_idx == instruction->handler_idx) -+ if (atomic_ops[i].opcode == instruction->opcode) - return atomic_ops[i].spirv_op; - } - - return SpvOpMax; - } - --static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode handler_idx) -+static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode opcode) - { -- return VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR; -+ return VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR; - } - - static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compiler, -@@ -9274,12 +9331,12 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - bool raw; - SpvOp op; - -- resource = is_imm_atomic_instruction(instruction->handler_idx) ? &dst[1] : &dst[0]; -+ resource = is_imm_atomic_instruction(instruction->opcode) ? &dst[1] : &dst[0]; - - op = spirv_compiler_map_atomic_instruction(instruction); - if (op == SpvOpMax) - { -- ERR("Unexpected instruction %#x.\n", instruction->handler_idx); -+ ERR("Unexpected instruction %#x.\n", instruction->opcode); - return; - } - -@@ -9315,14 +9372,14 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); - if (structure_stride || raw) - { -- assert(!raw != !structure_stride); -+ VKD3D_ASSERT(!raw != !structure_stride); - coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, - type_id, structure_stride, &src[0], VKD3DSP_WRITEMASK_0, - &src[0], VKD3DSP_WRITEMASK_1); - } - else - { -- assert(resource->reg.type != VKD3DSPR_GROUPSHAREDMEM); -+ VKD3D_ASSERT(resource->reg.type != VKD3DSPR_GROUPSHAREDMEM); - coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], coordinate_mask); - } - -@@ -9360,7 +9417,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - { - WARN("Ignoring 'volatile' attribute.\n"); - spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG, -- "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->handler_idx); -+ "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->opcode); - } - - memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) -@@ -9379,7 +9436,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil - result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream, - op, type_id, operands, i); - -- if (is_imm_atomic_instruction(instruction->handler_idx)) -+ if (is_imm_atomic_instruction(instruction->opcode)) - spirv_compiler_emit_store_dst(compiler, dst, result_id); - } - -@@ -9511,8 +9568,8 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co - - if (src->reg.type == VKD3DSPR_RASTERIZER) - { -- val_id = spirv_compiler_emit_uint_shader_parameter(compiler, -- VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT); -+ val_id = spirv_compiler_emit_shader_parameter(compiler, -+ VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, VKD3D_DATA_UINT); - } - else - { -@@ -9684,13 +9741,13 @@ static void spirv_compiler_emit_eval_attrib(struct spirv_compiler *compiler, - - src_ids[src_count++] = register_info.id; - -- if (instruction->handler_idx == VKD3DSIH_EVAL_CENTROID) -+ if (instruction->opcode == VKD3DSIH_EVAL_CENTROID) - { - op = GLSLstd450InterpolateAtCentroid; - } - else - { -- assert(instruction->handler_idx == VKD3DSIH_EVAL_SAMPLE_INDEX); -+ VKD3D_ASSERT(instruction->opcode == VKD3DSIH_EVAL_SAMPLE_INDEX); - op = GLSLstd450InterpolateAtSample; - src_ids[src_count++] = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); - } -@@ -9772,7 +9829,7 @@ static void spirv_compiler_emit_emit_stream(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - unsigned int stream_idx; - -- if (instruction->handler_idx == VKD3DSIH_EMIT_STREAM) -+ if (instruction->opcode == VKD3DSIH_EMIT_STREAM) - stream_idx = instruction->src[0].reg.idx[0].offset; - else - stream_idx = 0; -@@ -9793,7 +9850,7 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, - struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - unsigned int stream_idx; - -- if (instruction->handler_idx == VKD3DSIH_CUT_STREAM) -+ if (instruction->opcode == VKD3DSIH_CUT_STREAM) - stream_idx = instruction->src[0].reg.idx[0].offset; - else - stream_idx = 0; -@@ -9807,9 +9864,68 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, - vkd3d_spirv_build_op_end_primitive(builder); - } - --static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode handler_idx) -+static uint32_t map_quad_read_across_direction(enum vkd3d_shader_opcode opcode) -+{ -+ switch (opcode) -+ { -+ case VKD3DSIH_QUAD_READ_ACROSS_X: -+ return 0; -+ case VKD3DSIH_QUAD_READ_ACROSS_Y: -+ return 1; -+ case VKD3DSIH_QUAD_READ_ACROSS_D: -+ return 2; -+ default: -+ vkd3d_unreachable(); -+ } -+} -+ -+static void spirv_compiler_emit_quad_read_across(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, direction_type_id, direction_id, val_id; -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, -+ vsir_write_mask_component_count(dst->write_mask)); -+ direction_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, VKD3D_DATA_UINT, 1); -+ val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); -+ direction_id = map_quad_read_across_direction(instruction->opcode); -+ direction_id = vkd3d_spirv_get_op_constant(builder, direction_type_id, direction_id); -+ val_id = vkd3d_spirv_build_op_group_nonuniform_quad_swap(builder, type_id, val_id, direction_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static void spirv_compiler_emit_quad_read_lane_at(struct spirv_compiler *compiler, -+ const struct vkd3d_shader_instruction *instruction) -+{ -+ struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; -+ const struct vkd3d_shader_dst_param *dst = instruction->dst; -+ const struct vkd3d_shader_src_param *src = instruction->src; -+ uint32_t type_id, val_id, lane_id; -+ -+ if (!register_is_constant_or_undef(&src[1].reg)) -+ { -+ FIXME("Unsupported non-constant quad read lane index.\n"); -+ spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, -+ "Non-constant quad read lane indices are not supported."); -+ return; -+ } -+ -+ type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, -+ vsir_write_mask_component_count(dst->write_mask)); -+ val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); -+ lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); -+ val_id = vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(builder, type_id, val_id, lane_id); -+ -+ spirv_compiler_emit_store_dst(compiler, dst, val_id); -+} -+ -+static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode opcode) - { -- switch (handler_idx) -+ switch (opcode) - { - case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: - return SpvOpGroupNonUniformAllEqual; -@@ -9833,7 +9949,7 @@ static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, - - vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformVote); - -- op = map_wave_bool_op(instruction->handler_idx); -+ op = map_wave_bool_op(instruction->opcode); - type_id = vkd3d_spirv_get_op_type_bool(builder); - val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, -@@ -9865,9 +9981,9 @@ static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compil - spirv_compiler_emit_store_dst(compiler, dst, val_id); - } - --static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode handler_idx, bool is_float) -+static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode opcode, bool is_float) - { -- switch (handler_idx) -+ switch (opcode) - { - case VKD3DSIH_WAVE_ACTIVE_BIT_AND: - return SpvOpGroupNonUniformBitwiseAnd; -@@ -9905,7 +10021,7 @@ static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, - uint32_t type_id, val_id; - SpvOp op; - -- op = map_wave_alu_op(instruction->handler_idx, data_type_is_floating_point(src->reg.data_type)); -+ op = map_wave_alu_op(instruction->opcode, data_type_is_floating_point(src->reg.data_type)); - - type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, - vsir_write_mask_component_count(dst->write_mask)); -@@ -9928,7 +10044,7 @@ static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, - SpvGroupOperation group_op; - uint32_t type_id, val_id; - -- group_op = (instruction->handler_idx == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan -+ group_op = (instruction->opcode == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan - : SpvGroupOperationReduce; - - val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); -@@ -10014,7 +10130,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - - compiler->location = instruction->location; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DCL_GLOBAL_FLAGS: - spirv_compiler_emit_dcl_global_flags(compiler, instruction); -@@ -10337,6 +10453,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_CUT_STREAM: - spirv_compiler_emit_cut_stream(compiler, instruction); - break; -+ case VKD3DSIH_QUAD_READ_ACROSS_D: -+ case VKD3DSIH_QUAD_READ_ACROSS_X: -+ case VKD3DSIH_QUAD_READ_ACROSS_Y: -+ spirv_compiler_emit_quad_read_across(compiler, instruction); -+ break; -+ case VKD3DSIH_QUAD_READ_LANE_AT: -+ spirv_compiler_emit_quad_read_lane_at(compiler, instruction); -+ break; - case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: - case VKD3DSIH_WAVE_ALL_TRUE: - case VKD3DSIH_WAVE_ANY_TRUE: -@@ -10371,7 +10495,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_WAVE_READ_LANE_FIRST: - spirv_compiler_emit_wave_read_lane_first(compiler, instruction); - break; -- case VKD3DSIH_DCL: - case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: - case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: - case VKD3DSIH_DCL_INPUT_SGV: -@@ -10381,7 +10504,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - case VKD3DSIH_DCL_OUTPUT_SIV: - case VKD3DSIH_DCL_RESOURCE_RAW: - case VKD3DSIH_DCL_RESOURCE_STRUCTURED: -- case VKD3DSIH_DCL_SAMPLER: - case VKD3DSIH_DCL_UAV_RAW: - case VKD3DSIH_DCL_UAV_STRUCTURED: - case VKD3DSIH_DCL_UAV_TYPED: -@@ -10390,9 +10512,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, - /* nothing to do */ - break; - default: -- FIXME("Unhandled instruction %#x.\n", instruction->handler_idx); -+ FIXME("Unhandled instruction %#x.\n", instruction->opcode); - spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, -- "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); -+ "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); - break; - } - -@@ -10476,12 +10598,16 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - struct vkd3d_shader_instruction_array instructions; - enum vkd3d_shader_spirv_environment environment; - enum vkd3d_result result = VKD3D_OK; -- unsigned int i; -+ unsigned int i, max_element_count; - - if ((result = vsir_program_normalise(program, compiler->config_flags, - compile_info, compiler->message_context)) < 0) - return result; - -+ max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); -+ if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) -+ return VKD3D_ERROR_OUT_OF_MEMORY; -+ - if (program->temp_count) - spirv_compiler_emit_temps(compiler, program->temp_count); - if (program->ssa_count) -@@ -10489,9 +10615,38 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct - - spirv_compiler_emit_descriptor_declarations(compiler); - -+ compiler->spirv_parameter_info = vkd3d_calloc(program->parameter_count, sizeof(*compiler->spirv_parameter_info)); -+ for (i = 0; i < program->parameter_count; ++i) -+ { -+ const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; -+ -+ if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) -+ { -+ uint32_t type_id, struct_id, ptr_type_id, var_id; -+ -+ type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); -+ -+ struct_id = vkd3d_spirv_build_op_type_struct(builder, &type_id, 1); -+ vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); -+ vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, -+ SpvDecorationOffset, parameter->u.buffer.offset); -+ -+ ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, struct_id); -+ var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, -+ ptr_type_id, SpvStorageClassUniform, 0); -+ -+ vkd3d_spirv_build_op_decorate1(builder, var_id, SpvDecorationDescriptorSet, parameter->u.buffer.set); -+ vkd3d_spirv_build_op_decorate1(builder, var_id, SpvDecorationBinding, parameter->u.buffer.binding); -+ -+ compiler->spirv_parameter_info[i].buffer_id = var_id; -+ } -+ } -+ - if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) - return VKD3D_ERROR_OUT_OF_MEMORY; - -+ compiler->program = program; -+ - instructions = program->instructions; - memset(&program->instructions, 0, sizeof(program->instructions)); - -diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c -index b562e815a81..84f641cc316 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/tpf.c -+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c -@@ -22,6 +22,7 @@ - */ - - #include "hlsl.h" -+#include "vkd3d_shader_private.h" - - #define SM4_MAX_SRC_COUNT 6 - #define SM4_MAX_DST_COUNT 2 -@@ -780,7 +781,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui - if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) - { - FIXME("Ignoring shader data type %#x.\n", type); -- ins->handler_idx = VKD3DSIH_NOP; -+ ins->opcode = VKD3DSIH_NOP; - return; - } - -@@ -789,7 +790,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui - if (icb_size % 4) - { - FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - -@@ -797,7 +798,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui - { - ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); - vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - icb->register_idx = 0; -@@ -1716,7 +1717,7 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( - const struct vkd3d_sm4_register_type_info *register_type_info = - get_info_from_vkd3d_register_type(lookup, vkd3d_type); - -- assert(register_type_info); -+ VKD3D_ASSERT(register_type_info); - return register_type_info->default_src_swizzle_type; - } - -@@ -2395,16 +2396,16 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - if (!(opcode_info = get_info_from_sm4_opcode(&sm4->lookup, opcode))) - { - FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - *ptr += len; - return; - } - - vsir_instruction_init(ins, &sm4->p.location, opcode_info->handler_idx); -- if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE -- || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) -- sm4->phase = ins->handler_idx; -- sm4->has_control_point_phase |= ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE; -+ if (ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->opcode == VKD3DSIH_HS_FORK_PHASE -+ || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) -+ sm4->phase = ins->opcode; -+ sm4->has_control_point_phase |= ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE; - ins->flags = 0; - ins->coissue = false; - ins->raw = false; -@@ -2417,7 +2418,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - { - ERR("Failed to allocate src parameters.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; -@@ -2459,7 +2460,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - { - ERR("Failed to allocate dst parameters.\n"); - vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - for (i = 0; i < ins->dst_count; ++i) -@@ -2467,7 +2468,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), - &dst_params[i]))) - { -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - dst_params[i].modifiers |= instruction_dst_modifier; -@@ -2478,7 +2479,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), - &src_params[i]))) - { -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - } -@@ -2488,12 +2489,12 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str - - fail: - *ptr = sm4->end; -- ins->handler_idx = VKD3DSIH_INVALID; -+ ins->opcode = VKD3DSIH_INVALID; - return; - } - - static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_program *program, -- const uint32_t *byte_code, size_t byte_code_size, const char *source_name, -+ const uint32_t *byte_code, size_t byte_code_size, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context) - { - struct vkd3d_shader_version version; -@@ -2552,9 +2553,9 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro - version.minor = VKD3D_SM4_VERSION_MINOR(version_token); - - /* Estimate instruction count to avoid reallocation in most shaders. */ -- if (!vsir_program_init(program, &version, token_count / 7u + 20)) -+ if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20)) - return false; -- vkd3d_shader_parser_init(&sm4->p, program, message_context, source_name); -+ vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); - sm4->ptr = sm4->start; - - init_sm4_lookup_tables(&sm4->lookup); -@@ -2651,7 +2652,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - } - - if (!shader_sm4_init(&sm4, program, dxbc_desc.byte_code, dxbc_desc.byte_code_size, -- compile_info->source_name, message_context)) -+ compile_info, message_context)) - { - WARN("Failed to initialise shader parser.\n"); - free_dxbc_shader_desc(&dxbc_desc); -@@ -2693,7 +2694,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con - ins = &instructions->elements[instructions->count]; - shader_sm4_read_instruction(&sm4, ins); - -- if (ins->handler_idx == VKD3DSIH_INVALID) -+ if (ins->opcode == VKD3DSIH_INVALID) - { - WARN("Encountered unrecognized or invalid instruction.\n"); - vsir_program_cleanup(program); -@@ -2762,6 +2763,7 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_OUTPUT, true}, -+ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_SAMPLEMASK, false}, - }; - - for (i = 0; i < ARRAY_SIZE(register_table); ++i) -@@ -2817,6 +2819,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, -+ {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_COVERAGE}, - - {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, - {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, -@@ -2885,7 +2888,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - continue; - - ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); -- assert(ret); -+ VKD3D_ASSERT(ret); - if (usage == ~0u) - continue; - usage_idx = var->semantic.index; -@@ -2896,7 +2899,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, - } - else - { -- assert(var->regs[HLSL_REGSET_NUMERIC].allocated); -+ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); - reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; - } - -@@ -2973,7 +2976,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - switch (type->class) - { - case HLSL_CLASS_MATRIX: -- assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); -+ VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) - return D3D_SVC_MATRIX_COLUMNS; - else -@@ -2984,11 +2987,13 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - return D3D_SVC_VECTOR; - - case HLSL_CLASS_ARRAY: -+ case HLSL_CLASS_DEPTH_STENCIL_STATE: - case HLSL_CLASS_DEPTH_STENCIL_VIEW: - case HLSL_CLASS_EFFECT_GROUP: - case HLSL_CLASS_STRUCT: - case HLSL_CLASS_PASS: - case HLSL_CLASS_PIXEL_SHADER: -+ case HLSL_CLASS_RASTERIZER_STATE: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_SAMPLER: - case HLSL_CLASS_STRING: -@@ -2997,6 +3002,13 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) - case HLSL_CLASS_UAV: - case HLSL_CLASS_VERTEX_SHADER: - case HLSL_CLASS_VOID: -+ case HLSL_CLASS_CONSTANT_BUFFER: -+ case HLSL_CLASS_COMPUTE_SHADER: -+ case HLSL_CLASS_DOMAIN_SHADER: -+ case HLSL_CLASS_HULL_SHADER: -+ case HLSL_CLASS_GEOMETRY_SHADER: -+ case HLSL_CLASS_BLEND_STATE: -+ case HLSL_CLASS_NULL: - break; - } - vkd3d_unreachable(); -@@ -3077,7 +3089,7 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b - } - else - { -- assert(array_type->class <= HLSL_CLASS_LAST_NUMERIC); -+ VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC); - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); - put_u32(buffer, vkd3d_make_u32(array_type->dimy, array_type->dimx)); - put_u32(buffer, vkd3d_make_u32(array_size, 0)); -@@ -3098,8 +3110,6 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) - { - switch (type->class) - { -- case HLSL_CLASS_ARRAY: -- return sm4_resource_type(type->e.array.type); - case HLSL_CLASS_SAMPLER: - return D3D_SIT_SAMPLER; - case HLSL_CLASS_TEXTURE: -@@ -3115,9 +3125,6 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) - - static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) - { -- if (type->class == HLSL_CLASS_ARRAY) -- return sm4_resource_format(type->e.array.type); -- - switch (type->e.resource.format->e.numeric.type) - { - case HLSL_TYPE_DOUBLE: -@@ -3142,9 +3149,6 @@ static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type - - static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) - { -- if (type->class == HLSL_CLASS_ARRAY) -- return sm4_rdef_resource_dimension(type->e.array.type); -- - switch (type->sampler_dim) - { - case HLSL_SAMPLER_DIM_1D: -@@ -3178,13 +3182,21 @@ struct extern_resource - /* var is only not NULL if this resource is a whole variable, so it may be responsible for more - * than one component. */ - const struct hlsl_ir_var *var; -+ const struct hlsl_buffer *buffer; - - char *name; -- struct hlsl_type *data_type; - bool is_user_packed; - -+ /* The data type of a single component of the resource. -+ * This might be different from the data type of the resource itself in 4.0 -+ * profiles, where an array (or multi-dimensional array) is handled as a -+ * single resource, unlike in 5.0. */ -+ struct hlsl_type *component_type; -+ - enum hlsl_regset regset; -- unsigned int id, bind_count; -+ unsigned int id, space, index, bind_count; -+ -+ struct vkd3d_shader_location loc; - }; - - static int sm4_compare_extern_resources(const void *a, const void *b) -@@ -3196,7 +3208,10 @@ static int sm4_compare_extern_resources(const void *a, const void *b) - if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) - return r; - -- return vkd3d_u32_compare(aa->id, bb->id); -+ if ((r = vkd3d_u32_compare(aa->space, bb->space))) -+ return r; -+ -+ return vkd3d_u32_compare(aa->index, bb->index); - } - - static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) -@@ -3220,6 +3235,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; - struct extern_resource *extern_resources = NULL; - const struct hlsl_ir_var *var; -+ struct hlsl_buffer *buffer; - enum hlsl_regset regset; - size_t capacity = 0; - char *name; -@@ -3272,14 +3288,19 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - hlsl_release_string_buffer(ctx, name_buffer); - - extern_resources[*count].var = NULL; -+ extern_resources[*count].buffer = NULL; - - extern_resources[*count].name = name; -- extern_resources[*count].data_type = component_type; - extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; - -+ extern_resources[*count].component_type = component_type; -+ - extern_resources[*count].regset = regset; -- extern_resources[*count].id = var->regs[regset].id + regset_offset; -+ extern_resources[*count].id = var->regs[regset].id; -+ extern_resources[*count].space = var->regs[regset].space; -+ extern_resources[*count].index = var->regs[regset].index + regset_offset; - extern_resources[*count].bind_count = 1; -+ extern_resources[*count].loc = var->loc; - - ++*count; - } -@@ -3313,28 +3334,75 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un - } - - extern_resources[*count].var = var; -+ extern_resources[*count].buffer = NULL; - - extern_resources[*count].name = name; -- extern_resources[*count].data_type = var->data_type; -- extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; -+ /* For some reason 5.1 resources aren't marked as -+ * user-packed, but cbuffers still are. */ -+ extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) -+ && !!var->reg_reservation.reg_type; -+ -+ extern_resources[*count].component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); - - extern_resources[*count].regset = r; - extern_resources[*count].id = var->regs[r].id; -+ extern_resources[*count].space = var->regs[r].space; -+ extern_resources[*count].index = var->regs[r].index; - extern_resources[*count].bind_count = var->bind_count[r]; -+ extern_resources[*count].loc = var->loc; - - ++*count; - } - } - } - -+ LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) -+ { -+ if (!buffer->reg.allocated) -+ continue; -+ -+ if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, -+ sizeof(*extern_resources)))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ if (!(name = hlsl_strdup(ctx, buffer->name))) -+ { -+ sm4_free_extern_resources(extern_resources, *count); -+ *count = 0; -+ return NULL; -+ } -+ -+ extern_resources[*count].var = NULL; -+ extern_resources[*count].buffer = buffer; -+ -+ extern_resources[*count].name = name; -+ extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; -+ -+ extern_resources[*count].component_type = NULL; -+ -+ extern_resources[*count].regset = HLSL_REGSET_NUMERIC; -+ extern_resources[*count].id = buffer->reg.id; -+ extern_resources[*count].space = buffer->reg.space; -+ extern_resources[*count].index = buffer->reg.index; -+ extern_resources[*count].bind_count = 1; -+ extern_resources[*count].loc = buffer->loc; -+ -+ ++*count; -+ } -+ - qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); - return extern_resources; - } - - static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - { -- unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; -+ uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); - size_t cbuffers_offset, resources_offset, creator_offset, string_offset; -+ unsigned int cbuffer_count = 0, extern_resources_count, i, j; - size_t cbuffer_position, resource_position, creator_position; - const struct hlsl_profile_info *profile = ctx->profile; - struct vkd3d_bytecode_buffer buffer = {0}; -@@ -3354,19 +3422,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - -- resource_count += extern_resources_count; - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) -- { - ++cbuffer_count; -- ++resource_count; -- } - } - - put_u32(&buffer, cbuffer_count); - cbuffer_position = put_u32(&buffer, 0); -- put_u32(&buffer, resource_count); -+ put_u32(&buffer, extern_resources_count); - resource_position = put_u32(&buffer, 0); - put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), - target_types[profile->type])); -@@ -3378,7 +3442,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); - put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ - put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ -- put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ -+ put_u32(&buffer, binding_desc_size); /* size of binding desc */ - put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ - put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ - put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ -@@ -3395,55 +3459,38 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - const struct extern_resource *resource = &extern_resources[i]; - uint32_t flags = 0; - -- if (hlsl_version_ge(ctx, 5, 1)) -- hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); -- - if (resource->is_user_packed) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, sm4_resource_type(resource->data_type)); -- if (resource->regset == HLSL_REGSET_SAMPLERS) -- { -- put_u32(&buffer, 0); -- put_u32(&buffer, 0); -- put_u32(&buffer, 0); -- } -+ if (resource->buffer) -+ put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); - else -+ put_u32(&buffer, sm4_resource_type(resource->component_type)); -+ if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) - { -- unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource.format->dimx; -+ unsigned int dimx = resource->component_type->e.resource.format->dimx; - -- put_u32(&buffer, sm4_resource_format(resource->data_type)); -- put_u32(&buffer, sm4_rdef_resource_dimension(resource->data_type)); -+ put_u32(&buffer, sm4_resource_format(resource->component_type)); -+ put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); - put_u32(&buffer, ~0u); /* FIXME: multisample count */ - flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; - } -- put_u32(&buffer, resource->id); -+ else -+ { -+ put_u32(&buffer, 0); -+ put_u32(&buffer, 0); -+ put_u32(&buffer, 0); -+ } -+ put_u32(&buffer, resource->index); - put_u32(&buffer, resource->bind_count); - put_u32(&buffer, flags); -- } -- -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- uint32_t flags = 0; -- -- if (!cbuffer->reg.allocated) -- continue; - - if (hlsl_version_ge(ctx, 5, 1)) -- hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); -- -- if (cbuffer->reservation.reg_type) -- flags |= D3D_SIF_USERPACKED; -- -- put_u32(&buffer, 0); /* name */ -- put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); -- put_u32(&buffer, 0); /* return type */ -- put_u32(&buffer, 0); /* dimension */ -- put_u32(&buffer, 0); /* multisample count */ -- put_u32(&buffer, cbuffer->reg.id); /* bind point */ -- put_u32(&buffer, 1); /* bind count */ -- put_u32(&buffer, flags); /* flags */ -+ { -+ put_u32(&buffer, resource->space); -+ put_u32(&buffer, resource->id); -+ } - } - - for (i = 0; i < extern_resources_count; ++i) -@@ -3451,16 +3498,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - const struct extern_resource *resource = &extern_resources[i]; - - string_offset = put_string(&buffer, resource->name); -- set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); -- } -- -- LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) -- { -- if (!cbuffer->reg.allocated) -- continue; -- -- string_offset = put_string(&buffer, cbuffer->name); -- set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); -+ set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); - } - - /* Buffers. */ -@@ -3522,7 +3560,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); - put_u32(&buffer, flags); - put_u32(&buffer, 0); /* type */ -- put_u32(&buffer, 0); /* FIXME: default value */ -+ put_u32(&buffer, 0); /* default value */ - - if (profile->major_version >= 5) - { -@@ -3546,6 +3584,41 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - set_u32(&buffer, var_offset, string_offset); - write_sm4_type(ctx, &buffer, var->data_type); - set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); -+ -+ if (var->default_values) -+ { -+ unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; -+ unsigned int comp_count = hlsl_type_component_count(var->data_type); -+ unsigned int default_value_offset; -+ unsigned int k; -+ -+ default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); -+ set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); -+ -+ for (k = 0; k < comp_count; ++k) -+ { -+ struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); -+ unsigned int comp_offset; -+ enum hlsl_regset regset; -+ -+ if (comp_type->class == HLSL_CLASS_STRING) -+ { -+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Cannot write string default value."); -+ continue; -+ } -+ -+ comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); -+ if (regset == HLSL_REGSET_NUMERIC) -+ { -+ if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) -+ hlsl_fixme(ctx, &var->loc, "Write double default values."); -+ -+ set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), -+ var->default_values[k].number.u); -+ } -+ } -+ } - ++j; - } - } -@@ -3611,9 +3684,9 @@ static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_mod - switch (imod->type) - { - case VKD3D_SM4_MODIFIER_AOFFIMMI: -- assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); -- assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); -- assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); -+ VKD3D_ASSERT(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); -+ VKD3D_ASSERT(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); -+ VKD3D_ASSERT(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); - word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; - word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; - word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; -@@ -3652,7 +3725,7 @@ struct sm4_instruction - static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, - const struct hlsl_ir_node *instr) - { -- assert(instr->reg.allocated); -+ VKD3D_ASSERT(instr->reg.allocated); - reg->type = VKD3DSPR_TEMP; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = instr->reg.id; -@@ -3671,7 +3744,7 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s - reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; - reg->dimension = VSIR_DIMENSION_VEC4; - -- assert(var->regs[HLSL_REGSET_NUMERIC].allocated); -+ VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); - - if (!var->indexable) - { -@@ -3690,13 +3763,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s - struct vkd3d_shader_src_param *idx_src; - unsigned int idx_writemask; - -- assert(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); -+ VKD3D_ASSERT(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); - idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; - memset(idx_src, 0, sizeof(*idx_src)); - - reg->idx[1].rel_addr = idx_src; - sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); -- assert(idx_writemask != 0); -+ VKD3D_ASSERT(idx_writemask != 0); - idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); - } - } -@@ -3720,42 +3793,79 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - reg->type = VKD3DSPR_RESOURCE; - reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; -- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -- assert(regset == HLSL_REGSET_TEXTURES); -- reg->idx_count = 1; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } -+ VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_UAVS) - { - reg->type = VKD3DSPR_UAV; - reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; -- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -- assert(regset == HLSL_REGSET_UAVS); -- reg->idx_count = 1; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } -+ VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else if (regset == HLSL_REGSET_SAMPLERS) - { - reg->type = VKD3DSPR_SAMPLER; - reg->dimension = VSIR_DIMENSION_NONE; -- reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; -- reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -- assert(regset == HLSL_REGSET_SAMPLERS); -- reg->idx_count = 1; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; -+ reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ -+ reg->idx_count = 2; -+ } -+ else -+ { -+ reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; -+ reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); -+ reg->idx_count = 1; -+ } -+ VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); - *writemask = VKD3DSP_WRITEMASK_ALL; - } - else - { - unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; - -- assert(data_type->class <= HLSL_CLASS_VECTOR); -+ VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); - reg->type = VKD3DSPR_CONSTBUFFER; - reg->dimension = VSIR_DIMENSION_VEC4; -- reg->idx[0].offset = var->buffer->reg.id; -- reg->idx[1].offset = offset / 4; -- reg->idx_count = 2; -+ if (hlsl_version_ge(ctx, 5, 1)) -+ { -+ reg->idx[0].offset = var->buffer->reg.id; -+ reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ -+ reg->idx[2].offset = offset / 4; -+ reg->idx_count = 3; -+ } -+ else -+ { -+ reg->idx[0].offset = var->buffer->reg.index; -+ reg->idx[1].offset = offset / 4; -+ reg->idx_count = 2; -+ } - *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); - } - } -@@ -3780,7 +3890,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - -- assert(hlsl_reg.allocated); -+ VKD3D_ASSERT(hlsl_reg.allocated); - reg->type = VKD3DSPR_INPUT; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; -@@ -3812,7 +3922,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re - { - struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - -- assert(hlsl_reg.allocated); -+ VKD3D_ASSERT(hlsl_reg.allocated); - reg->type = VKD3DSPR_OUTPUT; - reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = hlsl_reg.id; -@@ -3948,7 +4058,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v - switch (sm4_swizzle_type) - { - case VKD3D_SM4_SWIZZLE_NONE: -- assert(sm4_swizzle || register_is_constant(reg)); -+ VKD3D_ASSERT(sm4_swizzle || register_is_constant(reg)); - token |= (sm4_swizzle << VKD3D_SM4_WRITEMASK_SHIFT) & VKD3D_SM4_WRITEMASK_MASK; - break; - -@@ -3980,16 +4090,16 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct - const struct vkd3d_shader_src_param *idx_src = reg->idx[j].rel_addr; - uint32_t idx_src_token; - -- assert(idx_src); -- assert(!idx_src->modifiers); -- assert(idx_src->reg.type != VKD3DSPR_IMMCONST); -+ VKD3D_ASSERT(idx_src); -+ VKD3D_ASSERT(!idx_src->modifiers); -+ VKD3D_ASSERT(idx_src->reg.type != VKD3DSPR_IMMCONST); - idx_src_token = sm4_encode_register(tpf, &idx_src->reg, VKD3D_SM4_SWIZZLE_SCALAR, idx_src->swizzle); - - put_u32(buffer, idx_src_token); - for (k = 0; k < idx_src->reg.idx_count; ++k) - { - put_u32(buffer, idx_src->reg.idx[k].offset); -- assert(!idx_src->reg.idx[k].rel_addr); -+ VKD3D_ASSERT(!idx_src->reg.idx[k].rel_addr); - } - } - else -@@ -4139,47 +4249,76 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, - - static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) - { -- const struct sm4_instruction instr = -+ size_t size = (cbuffer->used_size + 3) / 4; -+ -+ struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, - - .srcs[0].reg.dimension = VSIR_DIMENSION_VEC4, - .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, -- .srcs[0].reg.idx[0].offset = cbuffer->reg.id, -- .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, -- .srcs[0].reg.idx_count = 2, - .srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE, - .src_count = 1, - }; -+ -+ if (hlsl_version_ge(tpf->ctx, 5, 1)) -+ { -+ instr.srcs[0].reg.idx[0].offset = cbuffer->reg.id; -+ instr.srcs[0].reg.idx[1].offset = cbuffer->reg.index; -+ instr.srcs[0].reg.idx[2].offset = cbuffer->reg.index; /* FIXME: array end */ -+ instr.srcs[0].reg.idx_count = 3; -+ -+ instr.idx[0] = size; -+ instr.idx[1] = cbuffer->reg.space; -+ instr.idx_count = 2; -+ } -+ else -+ { -+ instr.srcs[0].reg.idx[0].offset = cbuffer->reg.index; -+ instr.srcs[0].reg.idx[1].offset = size; -+ instr.srcs[0].reg.idx_count = 2; -+ } -+ - write_sm4_instruction(tpf, &instr); - } - - static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) - { -- struct hlsl_type *component_type; - unsigned int i; - struct sm4_instruction instr = - { - .opcode = VKD3D_SM4_OP_DCL_SAMPLER, - - .dsts[0].reg.type = VKD3DSPR_SAMPLER, -- .dsts[0].reg.idx_count = 1, - .dst_count = 1, - }; - -- component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); -+ VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); - -- if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) -+ if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) - instr.extra_bits |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; - -- assert(resource->regset == HLSL_REGSET_SAMPLERS); -- - for (i = 0; i < resource->bind_count; ++i) - { - if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) - continue; - -- instr.dsts[0].reg.idx[0].offset = resource->id + i; -+ if (hlsl_version_ge(tpf->ctx, 5, 1)) -+ { -+ VKD3D_ASSERT(!i); -+ instr.dsts[0].reg.idx[0].offset = resource->id; -+ instr.dsts[0].reg.idx[1].offset = resource->index; -+ instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ -+ instr.dsts[0].reg.idx_count = 3; -+ -+ instr.idx[0] = resource->space; -+ instr.idx_count = 1; -+ } -+ else -+ { -+ instr.dsts[0].reg.idx[0].offset = resource->index + i; -+ instr.dsts[0].reg.idx_count = 1; -+ } - write_sm4_instruction(tpf, &instr); - } - } -@@ -4190,11 +4329,12 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; - struct hlsl_type *component_type; - struct sm4_instruction instr; -+ bool multisampled; - unsigned int i; - -- assert(resource->regset == regset); -+ VKD3D_ASSERT(resource->regset == regset); - -- component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); -+ component_type = resource->component_type; - - for (i = 0; i < resource->bind_count; ++i) - { -@@ -4212,20 +4352,47 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - .idx_count = 1, - }; - -+ multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -+ || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; -+ -+ if (hlsl_version_lt(tpf->ctx, 4, 1) && multisampled && !component_type->sample_count) -+ { -+ hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, -+ "Multisampled texture object declaration needs sample count for profile %s.", -+ tpf->ctx->profile->name); -+ } -+ -+ if (hlsl_version_ge(tpf->ctx, 5, 1)) -+ { -+ VKD3D_ASSERT(!i); -+ instr.dsts[0].reg.idx[0].offset = resource->id; -+ instr.dsts[0].reg.idx[1].offset = resource->index; -+ instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ -+ instr.dsts[0].reg.idx_count = 3; -+ -+ instr.idx[1] = resource->space; -+ instr.idx_count = 2; -+ } -+ else -+ { -+ instr.dsts[0].reg.idx[0].offset = resource->index + i; -+ instr.dsts[0].reg.idx_count = 1; -+ } -+ - if (uav) - { -- switch (resource->data_type->sampler_dim) -+ switch (component_type->sampler_dim) - { -- case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: -- instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; -- instr.byte_stride = resource->data_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; -- break; -- default: -- instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; -- break; -+ case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: -+ instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; -+ instr.byte_stride = component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; -+ break; -+ default: -+ instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; -+ break; - } - -- if (resource->data_type->e.resource.rasteriser_ordered) -+ if (component_type->e.resource.rasteriser_ordered) - instr.opcode |= VKD3DSUF_RASTERISER_ORDERED_VIEW << VKD3D_SM5_UAV_FLAGS_SHIFT; - } - else -@@ -4234,11 +4401,8 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex - } - instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); - -- if (component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS -- || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) -- { -+ if (multisampled) - instr.extra_bits |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; -- } - - write_sm4_instruction(tpf, &instr); - } -@@ -4449,7 +4613,7 @@ static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tp - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - -- assert(dst_idx < ARRAY_SIZE(instr.dsts)); -+ VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; - instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; -@@ -4508,7 +4672,7 @@ static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *t - memset(&instr, 0, sizeof(instr)); - instr.opcode = opcode; - -- assert(dst_idx < ARRAY_SIZE(instr.dsts)); -+ VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); - sm4_dst_from_node(&instr.dsts[dst_idx], dst); - instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; - instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; -@@ -4706,7 +4870,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl - const struct hlsl_ir_node *dst = &load->node; - struct sm4_instruction instr; - -- assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); -+ VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -@@ -4735,7 +4899,7 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir - return; - } - -- assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); -+ VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); - - memset(&instr, 0, sizeof(instr)); - instr.opcode = VKD3D_SM4_OP_RESINFO; -@@ -4789,7 +4953,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex - const struct hlsl_type *src_type = arg1->data_type; - - /* Narrowing casts were already lowered. */ -- assert(src_type->dimx == dst_type->dimx); -+ VKD3D_ASSERT(src_type->dimx == dst_type->dimx); - - switch (dst_type->e.numeric.type) - { -@@ -4904,6 +5068,25 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct - write_sm4_instruction(tpf, &instr); - } - -+static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) -+{ -+ struct sm4_instruction instr; -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; -+ instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; -+ -+ sm4_dst_from_node(&instr.dsts[0], dst); -+ instr.dst_count = 1; -+ -+ instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; -+ instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; -+ instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); -+ instr.src_count = 1; -+ -+ write_sm4_instruction(tpf, &instr); -+} -+ - static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) - { - const struct hlsl_ir_node *arg1 = expr->operands[0].node; -@@ -4912,13 +5095,21 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - const struct hlsl_type *dst_type = expr->node.data_type; - struct vkd3d_string_buffer *dst_type_string; - -- assert(expr->node.reg.allocated); -+ VKD3D_ASSERT(expr->node.reg.allocated); - - if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) - return; - - switch (expr->op) - { -+ case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: -+ if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) -+ write_sm4_rasterizer_sample_count(tpf, &expr->node); -+ else -+ hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, -+ "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); -+ break; -+ - case HLSL_OP1_ABS: - switch (dst_type->e.numeric.type) - { -@@ -4932,7 +5123,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP1_BIT_NOT: -- assert(type_is_integer(dst_type)); -+ VKD3D_ASSERT(type_is_integer(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - -@@ -4941,67 +5132,73 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP1_CEIL: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_PI, &expr->node, arg1, 0); - break; - - case HLSL_OP1_COS: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); - break; - - case HLSL_OP1_DSX: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSX_COARSE: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSX_FINE: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY_COARSE: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_DSY_FINE: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_EXP2: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); - break; - -+ case HLSL_OP1_F16TOF32: -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); -+ break; -+ - case HLSL_OP1_FLOOR: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); - break; - - case HLSL_OP1_FRACT: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOG2: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); - break; - - case HLSL_OP1_LOGIC_NOT: -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); - break; - -@@ -5022,39 +5219,77 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - } - break; - -+ case HLSL_OP1_RCP: -+ switch (dst_type->e.numeric.type) -+ { -+ case HLSL_TYPE_FLOAT: -+ /* SM5 comes with a RCP opcode */ -+ if (tpf->ctx->profile->major_version >= 5) -+ { -+ write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); -+ } -+ else -+ { -+ /* For SM4, implement as DIV dst, 1.0, src */ -+ struct sm4_instruction instr; -+ struct hlsl_constant_value one; -+ -+ VKD3D_ASSERT(type_is_float(dst_type)); -+ -+ memset(&instr, 0, sizeof(instr)); -+ instr.opcode = VKD3D_SM4_OP_DIV; -+ -+ sm4_dst_from_node(&instr.dsts[0], &expr->node); -+ instr.dst_count = 1; -+ -+ for (unsigned int i = 0; i < 4; i++) -+ one.u[i].f = 1.0f; -+ sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask); -+ sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask); -+ instr.src_count = 2; -+ -+ write_sm4_instruction(tpf, &instr); -+ } -+ break; -+ -+ default: -+ hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer); -+ } -+ break; -+ - case HLSL_OP1_REINTERPRET: - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); - break; - - case HLSL_OP1_ROUND: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); - break; - - case HLSL_OP1_RSQ: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); - break; - - case HLSL_OP1_SAT: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV - | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), - &expr->node, arg1, 0); - break; - - case HLSL_OP1_SIN: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); - break; - - case HLSL_OP1_SQRT: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); - break; - - case HLSL_OP1_TRUNC: -- assert(type_is_float(dst_type)); -+ VKD3D_ASSERT(type_is_float(dst_type)); - write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); - break; - -@@ -5076,17 +5311,17 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - break; - - case HLSL_OP2_BIT_AND: -- assert(type_is_integer(dst_type)); -+ VKD3D_ASSERT(type_is_integer(dst_type)); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_OR: -- assert(type_is_integer(dst_type)); -+ VKD3D_ASSERT(type_is_integer(dst_type)); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_BIT_XOR: -- assert(type_is_integer(dst_type)); -+ VKD3D_ASSERT(type_is_integer(dst_type)); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); - break; - -@@ -5139,7 +5374,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - - switch (src_type->e.numeric.type) - { -@@ -5165,7 +5400,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - - switch (src_type->e.numeric.type) - { -@@ -5194,7 +5429,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - - switch (src_type->e.numeric.type) - { -@@ -5220,18 +5455,18 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - } - - case HLSL_OP2_LOGIC_AND: -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LOGIC_OR: -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); - break; - - case HLSL_OP2_LSHIFT: -- assert(type_is_integer(dst_type)); -- assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); - break; - -@@ -5310,7 +5545,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - { - const struct hlsl_type *src_type = arg1->data_type; - -- assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - - switch (src_type->e.numeric.type) - { -@@ -5333,8 +5568,8 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex - } - - case HLSL_OP2_RSHIFT: -- assert(type_is_integer(dst_type)); -- assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); -+ VKD3D_ASSERT(type_is_integer(dst_type)); -+ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); - write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, - &expr->node, arg1, arg2); - break; -@@ -5358,7 +5593,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if * - .src_count = 1, - }; - -- assert(iff->condition.node->data_type->dimx == 1); -+ VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); - - sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); - write_sm4_instruction(tpf, &instr); -@@ -5436,7 +5671,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo - sm4_dst_from_node(&instr.dsts[0], &load->node); - instr.dst_count = 1; - -- assert(hlsl_is_numeric_type(type)); -+ VKD3D_ASSERT(hlsl_is_numeric_type(type)); - if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) - { - struct hlsl_constant_value value; -@@ -5553,7 +5788,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h - case HLSL_RESOURCE_SAMPLE_LOD_BIAS: - case HLSL_RESOURCE_SAMPLE_GRAD: - /* Combined sample expressions were lowered. */ -- assert(load->sampler.var); -+ VKD3D_ASSERT(load->sampler.var); - write_sm4_sample(tpf, load); - break; - -@@ -5706,7 +5941,7 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc - - if (!instr->reg.allocated) - { -- assert(instr->type == HLSL_IR_CONSTANT); -+ VKD3D_ASSERT(instr->type == HLSL_IR_CONSTANT); - continue; - } - } -@@ -5799,21 +6034,13 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (cbuffer->reg.allocated) -- { -- if (hlsl_version_ge(ctx, 5, 1)) -- hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); -- - write_sm4_dcl_constant_buffer(&tpf, cbuffer); -- } - } - - for (i = 0; i < extern_resources_count; ++i) - { - const struct extern_resource *resource = &extern_resources[i]; - -- if (hlsl_version_ge(ctx, 5, 1)) -- hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); -- - if (resource->regset == HLSL_REGSET_SAMPLERS) - write_sm4_dcl_samplers(&tpf, resource); - else if (resource->regset == HLSL_REGSET_TEXTURES) -@@ -5875,7 +6102,7 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) - extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - for (unsigned int i = 0; i < extern_resources_count; ++i) - { -- if (extern_resources[i].data_type->e.resource.rasteriser_ordered) -+ if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered) - *flags |= VKD3D_SM4_REQUIRES_ROVS; - } - sm4_free_extern_resources(extern_resources, extern_resources_count); -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -index 14a3fa778e5..3c1ffcdbee3 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c -@@ -23,6 +23,8 @@ - #include - #include - -+/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ -+ - static inline int char_to_int(char c) - { - if ('0' <= c && c <= '9') -@@ -60,7 +62,7 @@ void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer) - buffer->buffer_size = 16; - buffer->content_size = 0; - buffer->buffer = vkd3d_malloc(buffer->buffer_size); -- assert(buffer->buffer); -+ VKD3D_ASSERT(buffer->buffer); - memset(buffer->buffer, 0, buffer->buffer_size); - } - -@@ -228,7 +230,7 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *cache, struct - { - if (!buffer) - return; -- assert(cache->count + 1 <= cache->max_count); -+ VKD3D_ASSERT(cache->count + 1 <= cache->max_count); - cache->buffers[cache->count++] = buffer; - } - -@@ -429,7 +431,7 @@ static void bytecode_set_bytes(struct vkd3d_bytecode_buffer *buffer, size_t offs - if (buffer->status) - return; - -- assert(vkd3d_bound_range(offset, size, buffer->size)); -+ VKD3D_ASSERT(vkd3d_bound_range(offset, size, buffer->size)); - memcpy(buffer->data + offset, value, size); - } - -@@ -642,7 +644,7 @@ static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_sig - signature->element_count = src->element_count; - if (!src->elements) - { -- assert(!signature->element_count); -+ VKD3D_ASSERT(!signature->element_count); - signature->elements = NULL; - return true; - } -@@ -787,7 +789,7 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_push_cf_info(struct vkd3d_ - - static void vkd3d_shader_scan_pop_cf_info(struct vkd3d_shader_scan_context *context) - { -- assert(context->cf_info_count); -+ VKD3D_ASSERT(context->cf_info_count); - - --context->cf_info_count; - } -@@ -847,12 +849,13 @@ static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_contex - - static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) - { -- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; -- return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) -- || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR) -- || handler_idx == VKD3DSIH_LD_UAV_TYPED -- || (handler_idx == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) -- || (handler_idx == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ -+ return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) -+ || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR) -+ || opcode == VKD3DSIH_LD_UAV_TYPED -+ || (opcode == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) -+ || (opcode == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); - } - - static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, -@@ -863,9 +866,9 @@ static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context * - - static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) - { -- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; -- return handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC -- || handler_idx == VKD3DSIH_IMM_ATOMIC_CONSUME; -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ -+ return opcode == VKD3DSIH_IMM_ATOMIC_ALLOC || opcode == VKD3DSIH_IMM_ATOMIC_CONSUME; - } - - static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, -@@ -876,9 +879,10 @@ static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_contex - - static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) - { -- enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; -- return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) -- || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR); -+ enum vkd3d_shader_opcode opcode = instruction->opcode; -+ -+ return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) -+ || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR); - } - - static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, -@@ -1130,7 +1134,7 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte - - context->location = instruction->location; - -- switch (instruction->handler_idx) -+ switch (instruction->opcode) - { - case VKD3DSIH_DCL_CONSTANT_BUFFER: - vkd3d_shader_scan_constant_buffer_declaration(context, instruction); -@@ -2063,7 +2067,7 @@ bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *ins - bool shader_instruction_array_insert_at(struct vkd3d_shader_instruction_array *instructions, - unsigned int idx, unsigned int count) - { -- assert(idx <= instructions->count); -+ VKD3D_ASSERT(idx <= instructions->count); - - if (!shader_instruction_array_reserve(instructions, instructions->count + count)) - return false; -diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -index 29b8d6ad022..ef66a8ca07a 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h -@@ -51,7 +51,6 @@ - #include "vkd3d_shader.h" - #include "wine/list.h" - --#include - #include - #include - #include -@@ -151,6 +150,8 @@ enum vkd3d_shader_error - VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, - VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, - VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, -+ VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, -+ VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032, - - VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, - VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, -@@ -455,6 +456,10 @@ enum vkd3d_shader_opcode - VKD3DSIH_PHASE, - VKD3DSIH_PHI, - VKD3DSIH_POW, -+ VKD3DSIH_QUAD_READ_ACROSS_D, -+ VKD3DSIH_QUAD_READ_ACROSS_X, -+ VKD3DSIH_QUAD_READ_ACROSS_Y, -+ VKD3DSIH_QUAD_READ_LANE_AT, - VKD3DSIH_RCP, - VKD3DSIH_REP, - VKD3DSIH_RESINFO, -@@ -613,6 +618,7 @@ enum vkd3d_shader_register_type - VKD3DSPR_SSA, - VKD3DSPR_WAVELANECOUNT, - VKD3DSPR_WAVELANEINDEX, -+ VKD3DSPR_PARAMETER, - - VKD3DSPR_COUNT, - -@@ -805,6 +811,7 @@ enum vkd3d_tessellator_domain - - #define VKD3DSI_NONE 0x0 - #define VKD3DSI_TEXLD_PROJECT 0x1 -+#define VKD3DSI_TEXLD_BIAS 0x2 - #define VKD3DSI_INDEXED_DYNAMIC 0x4 - #define VKD3DSI_RESINFO_RCP_FLOAT 0x1 - #define VKD3DSI_RESINFO_UINT 0x2 -@@ -1189,7 +1196,7 @@ struct vkd3d_shader_location - struct vkd3d_shader_instruction - { - struct vkd3d_shader_location location; -- enum vkd3d_shader_opcode handler_idx; -+ enum vkd3d_shader_opcode opcode; - uint32_t flags; - unsigned int dst_count; - unsigned int src_count; -@@ -1238,8 +1245,8 @@ static inline bool vkd3d_shader_ver_le(const struct vkd3d_shader_version *v, uns - return v->major < major || (v->major == major && v->minor <= minor); - } - --void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, -- enum vkd3d_shader_opcode handler_idx); -+void vsir_instruction_init(struct vkd3d_shader_instruction *ins, -+ const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode); - - static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) - { -@@ -1303,14 +1310,14 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, - static inline struct vkd3d_shader_src_param *shader_src_param_allocator_get( - struct vkd3d_shader_param_allocator *allocator, unsigned int count) - { -- assert(allocator->stride == sizeof(struct vkd3d_shader_src_param)); -+ VKD3D_ASSERT(allocator->stride == sizeof(struct vkd3d_shader_src_param)); - return shader_param_allocator_get(allocator, count); - } - - static inline struct vkd3d_shader_dst_param *shader_dst_param_allocator_get( - struct vkd3d_shader_param_allocator *allocator, unsigned int count) - { -- assert(allocator->stride == sizeof(struct vkd3d_shader_dst_param)); -+ VKD3D_ASSERT(allocator->stride == sizeof(struct vkd3d_shader_dst_param)); - return shader_param_allocator_get(allocator, count); - } - -@@ -1355,6 +1362,10 @@ struct vsir_program - struct shader_signature output_signature; - struct shader_signature patch_constant_signature; - -+ unsigned int parameter_count; -+ const struct vkd3d_shader_parameter1 *parameters; -+ bool free_parameters; -+ - unsigned int input_control_point_count, output_control_point_count; - unsigned int flat_constant_count[3]; - unsigned int block_count; -@@ -1370,7 +1381,10 @@ void vsir_program_cleanup(struct vsir_program *program); - int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, - struct vkd3d_shader_message_context *message_context); --bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); -+const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( -+ const struct vsir_program *program, enum vkd3d_shader_parameter_name name); -+bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, -+ const struct vkd3d_shader_version *version, unsigned int reserve); - enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, - const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); - enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, -@@ -1663,7 +1677,7 @@ static inline unsigned int vsir_write_mask_get_component_idx(uint32_t write_mask - { - unsigned int i; - -- assert(write_mask); -+ VKD3D_ASSERT(write_mask); - for (i = 0; i < VKD3D_VEC4_SIZE; ++i) - { - if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) -@@ -1677,13 +1691,13 @@ static inline unsigned int vsir_write_mask_get_component_idx(uint32_t write_mask - static inline unsigned int vsir_write_mask_component_count(uint32_t write_mask) - { - unsigned int count = vkd3d_popcount(write_mask & VKD3DSP_WRITEMASK_ALL); -- assert(1 <= count && count <= VKD3D_VEC4_SIZE); -+ VKD3D_ASSERT(1 <= count && count <= VKD3D_VEC4_SIZE); - return count; - } - - static inline unsigned int vkd3d_write_mask_from_component_count(unsigned int component_count) - { -- assert(component_count <= VKD3D_VEC4_SIZE); -+ VKD3D_ASSERT(component_count <= VKD3D_VEC4_SIZE); - return (VKD3DSP_WRITEMASK_0 << component_count) - 1; - } - -diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c -index a0a29ed30cb..11d87ac1d98 100644 ---- a/libs/vkd3d/libs/vkd3d/cache.c -+++ b/libs/vkd3d/libs/vkd3d/cache.c -@@ -69,7 +69,14 @@ static int vkd3d_shader_cache_compare_key(const void *key, const struct rb_entry - static void vkd3d_shader_cache_add_entry(struct vkd3d_shader_cache *cache, - struct shader_cache_entry *e) - { -- rb_put(&cache->tree, &e->h.hash, &e->entry); -+ const struct shader_cache_key k = -+ { -+ .hash = e->h.hash, -+ .key_size = e->h.key_size, -+ .key = e->payload -+ }; -+ -+ rb_put(&cache->tree, &k, &e->entry); - } - - int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) -diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c -index 95366d3441b..dcc7690876f 100644 ---- a/libs/vkd3d/libs/vkd3d/command.c -+++ b/libs/vkd3d/libs/vkd3d/command.c -@@ -93,7 +93,7 @@ VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue) - - vkd3d_mutex_lock(&queue->mutex); - -- assert(queue->vk_queue); -+ VKD3D_ASSERT(queue->vk_queue); - return queue->vk_queue; - } - -@@ -423,7 +423,7 @@ static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, - static const struct d3d12_root_parameter *root_signature_get_parameter( - const struct d3d12_root_signature *root_signature, unsigned int index) - { -- assert(index < root_signature->parameter_count); -+ VKD3D_ASSERT(index < root_signature->parameter_count); - return &root_signature->parameters[index]; - } - -@@ -431,7 +431,7 @@ static const struct d3d12_root_descriptor_table *root_signature_get_descriptor_t - const struct d3d12_root_signature *root_signature, unsigned int index) - { - const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); -- assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE); -+ VKD3D_ASSERT(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE); - return &p->u.descriptor_table; - } - -@@ -439,7 +439,7 @@ static const struct d3d12_root_constant *root_signature_get_32bit_constants( - const struct d3d12_root_signature *root_signature, unsigned int index) - { - const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); -- assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS); -+ VKD3D_ASSERT(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS); - return &p->u.constant; - } - -@@ -447,7 +447,7 @@ static const struct d3d12_root_parameter *root_signature_get_root_descriptor( - const struct d3d12_root_signature *root_signature, unsigned int index) - { - const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); -- assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV -+ VKD3D_ASSERT(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV - || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_SRV - || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV); - return p; -@@ -528,7 +528,7 @@ static void d3d12_fence_garbage_collect_vk_semaphores_locked(struct d3d12_fence - - if (current->u.binary.vk_fence) - WARN("Destroying potentially pending semaphore.\n"); -- assert(!current->u.binary.is_acquired); -+ VKD3D_ASSERT(!current->u.binary.is_acquired); - - VK_CALL(vkDestroySemaphore(device->vk_device, current->u.binary.vk_semaphore, NULL)); - fence->semaphores[i] = fence->semaphores[--fence->semaphore_count]; -@@ -599,7 +599,7 @@ static void d3d12_fence_remove_vk_semaphore(struct d3d12_fence *fence, struct vk - { - vkd3d_mutex_lock(&fence->mutex); - -- assert(semaphore->u.binary.is_acquired); -+ VKD3D_ASSERT(semaphore->u.binary.is_acquired); - - *semaphore = fence->semaphores[--fence->semaphore_count]; - -@@ -610,7 +610,7 @@ static void d3d12_fence_release_vk_semaphore(struct d3d12_fence *fence, struct v - { - vkd3d_mutex_lock(&fence->mutex); - -- assert(semaphore->u.binary.is_acquired); -+ VKD3D_ASSERT(semaphore->u.binary.is_acquired); - semaphore->u.binary.is_acquired = false; - - vkd3d_mutex_unlock(&fence->mutex); -@@ -1154,7 +1154,7 @@ static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) - - if (!(iface1 = (ID3D12Fence1 *)iface)) - return NULL; -- assert(iface1->lpVtbl == &d3d12_fence_vtbl); -+ VKD3D_ASSERT(iface1->lpVtbl == &d3d12_fence_vtbl); - return impl_from_ID3D12Fence1(iface1); - } - -@@ -1792,7 +1792,7 @@ static struct d3d12_command_allocator *unsafe_impl_from_ID3D12CommandAllocator(I - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_command_allocator_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_command_allocator_vtbl); - return impl_from_ID3D12CommandAllocator(iface); - } - -@@ -1942,9 +1942,9 @@ static void d3d12_command_signature_decref(struct d3d12_command_signature *signa - } - - /* ID3D12CommandList */ --static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList5(ID3D12GraphicsCommandList5 *iface) -+static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList6(ID3D12GraphicsCommandList6 *iface) - { -- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList5_iface); -+ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList6_iface); - } - - static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list) -@@ -2025,7 +2025,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l - - static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, unsigned int stencil_state, - const struct d3d12_resource *resource, VkQueueFlags vk_queue_flags, const struct vkd3d_vulkan_info *vk_info, -- VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout) -+ VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout, -+ struct d3d12_device *device) - { - bool is_swapchain_image = resource && (resource->flags & VKD3D_RESOURCE_PRESENT_STATE_TRANSITION); - VkPipelineStageFlags queue_shader_stages = 0; -@@ -2033,10 +2034,12 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, - if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT) - { - queue_shader_stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT -- | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT -- | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT -- | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT - | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; -+ if (device->vk_info.geometry_shaders) -+ queue_shader_stages |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; -+ if (device->vk_info.tessellation_shaders) -+ queue_shader_stages |= VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT -+ | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; - } - if (vk_queue_flags & VK_QUEUE_COMPUTE_BIT) - queue_shader_stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; -@@ -2054,7 +2057,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, - { - if (resource->present_state != D3D12_RESOURCE_STATE_PRESENT) - return vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, -- resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); -+ resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout, device); - - *access_mask = VK_ACCESS_MEMORY_READ_BIT; - *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; -@@ -2165,7 +2168,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, - } - - /* Handle read-only states. */ -- assert(!is_write_resource_state(state)); -+ VKD3D_ASSERT(!is_write_resource_state(state)); - - if (state & D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER) - { -@@ -2239,7 +2242,7 @@ static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12 - VkPipelineStageFlags src_stage_mask, dst_stage_mask; - VkImageMemoryBarrier barrier; - -- assert(d3d12_resource_is_texture(resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(resource)); - - barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; - barrier.pNext = NULL; -@@ -2251,7 +2254,8 @@ static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12 - VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED; - - if (!vk_barrier_parameters_from_d3d12_resource_state(resource->initial_state, 0, -- resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, &dst_stage_mask, &barrier.newLayout)) -+ resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, -+ &dst_stage_mask, &barrier.newLayout, list->device)) - { - FIXME("Unhandled state %#x.\n", resource->initial_state); - return; -@@ -2285,12 +2289,13 @@ static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *l - } - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList5 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList6 *iface, - REFIID iid, void **object) - { - TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); - -- if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList5) -+ if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList6) -+ || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList5) - || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList4) - || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3) - || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) -@@ -2301,7 +2306,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic - || IsEqualGUID(iid, &IID_ID3D12Object) - || IsEqualGUID(iid, &IID_IUnknown)) - { -- ID3D12GraphicsCommandList5_AddRef(iface); -+ ID3D12GraphicsCommandList6_AddRef(iface); - *object = iface; - return S_OK; - } -@@ -2312,9 +2317,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic - return E_NOINTERFACE; - } - --static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList5 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList6 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - unsigned int refcount = vkd3d_atomic_increment_u32(&list->refcount); - - TRACE("%p increasing refcount to %u.\n", list, refcount); -@@ -2327,9 +2332,9 @@ static void vkd3d_pipeline_bindings_cleanup(struct vkd3d_pipeline_bindings *bind - vkd3d_free(bindings->vk_uav_counter_views); - } - --static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList5 *iface) -+static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList6 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - unsigned int refcount = vkd3d_atomic_decrement_u32(&list->refcount); - - TRACE("%p decreasing refcount to %u.\n", list, refcount); -@@ -2355,66 +2360,67 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL - return refcount; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList5 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList6 *iface, - REFGUID guid, UINT *data_size, void *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); - - return vkd3d_get_private_data(&list->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList5 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList6 *iface, - REFGUID guid, UINT data_size, const void *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); - - return vkd3d_set_private_data(&list->private_store, guid, data_size, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList5 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList6 *iface, - REFGUID guid, const IUnknown *data) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); - - return vkd3d_set_private_data_interface(&list->private_store, guid, data); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList5 *iface, const WCHAR *name) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList6 *iface, const WCHAR *name) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size)); - - return name ? S_OK : E_INVALIDARG; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList5 *iface, REFIID iid, void **device) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList6 *iface, -+ REFIID iid, void **device) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); - - return d3d12_device_query_interface(list->device, iid, device); - } - --static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList5 *iface) -+static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList6 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p.\n", iface); - - return list->type; - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList5 *iface) -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList6 *iface) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - VkResult vr; - -@@ -2458,7 +2464,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL - static void d3d12_command_list_reset_state(struct d3d12_command_list *list, - ID3D12PipelineState *initial_pipeline_state) - { -- ID3D12GraphicsCommandList5 *iface = &list->ID3D12GraphicsCommandList5_iface; -+ ID3D12GraphicsCommandList6 *iface = &list->ID3D12GraphicsCommandList6_iface; - - memset(list->strides, 0, sizeof(list->strides)); - list->primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; -@@ -2494,14 +2500,14 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, - - list->descriptor_heap_count = 0; - -- ID3D12GraphicsCommandList5_SetPipelineState(iface, initial_pipeline_state); -+ ID3D12GraphicsCommandList6_SetPipelineState(iface, initial_pipeline_state); - } - --static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList5 *iface, -+static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList6 *iface, - ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) - { - struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - HRESULT hr; - - TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", -@@ -2528,7 +2534,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandL - return hr; - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList6 *iface, - ID3D12PipelineState *pipeline_state) - { - FIXME("iface %p, pipeline_state %p stub!\n", iface, pipeline_state); -@@ -2538,7 +2544,7 @@ static bool d3d12_command_list_has_depth_stencil_view(struct d3d12_command_list - { - struct d3d12_graphics_pipeline_state *graphics; - -- assert(d3d12_pipeline_state_is_graphics(list->state)); -+ VKD3D_ASSERT(d3d12_pipeline_state_is_graphics(list->state)); - graphics = &list->state->u.graphics; - - return graphics->dsv_format || (d3d12_pipeline_state_has_unknown_dsv_format(list->state) && list->dsv_format); -@@ -2973,30 +2979,20 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list - enum vkd3d_pipeline_bind_point bind_point) - { - struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; -+ VkWriteDescriptorSet descriptor_writes[ARRAY_SIZE(bindings->push_descriptors)] = {0}; -+ VkDescriptorBufferInfo buffer_infos[ARRAY_SIZE(bindings->push_descriptors)] = {0}; - const struct d3d12_root_signature *root_signature = bindings->root_signature; -- VkWriteDescriptorSet *descriptor_writes = NULL, *current_descriptor_write; - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; -- VkDescriptorBufferInfo *buffer_infos = NULL, *current_buffer_info; - const struct d3d12_root_parameter *root_parameter; - struct vkd3d_push_descriptor *push_descriptor; - struct d3d12_device *device = list->device; - VkDescriptorBufferInfo *vk_buffer_info; -- unsigned int i, descriptor_count; -+ unsigned int i, descriptor_count = 0; - VkBufferView *vk_buffer_view; - - if (!bindings->push_descriptor_dirty_mask) - return; - -- descriptor_count = vkd3d_popcount(bindings->push_descriptor_dirty_mask); -- -- if (!(descriptor_writes = vkd3d_calloc(descriptor_count, sizeof(*descriptor_writes)))) -- return; -- if (!(buffer_infos = vkd3d_calloc(descriptor_count, sizeof(*buffer_infos)))) -- goto done; -- -- descriptor_count = 0; -- current_buffer_info = buffer_infos; -- current_descriptor_write = descriptor_writes; - for (i = 0; i < ARRAY_SIZE(bindings->push_descriptors); ++i) - { - if (!(bindings->push_descriptor_dirty_mask & (1u << i))) -@@ -3008,7 +3004,7 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list - if (root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV) - { - vk_buffer_view = NULL; -- vk_buffer_info = current_buffer_info; -+ vk_buffer_info = &buffer_infos[descriptor_count]; - vk_buffer_info->buffer = push_descriptor->u.cbv.vk_buffer; - vk_buffer_info->offset = push_descriptor->u.cbv.offset; - vk_buffer_info->range = VK_WHOLE_SIZE; -@@ -3019,21 +3015,15 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list - vk_buffer_info = NULL; - } - -- if (!vk_write_descriptor_set_from_root_descriptor(current_descriptor_write, -+ if (!vk_write_descriptor_set_from_root_descriptor(&descriptor_writes[descriptor_count], - root_parameter, bindings->descriptor_sets[0], vk_buffer_view, vk_buffer_info)) - continue; - - ++descriptor_count; -- ++current_descriptor_write; -- ++current_buffer_info; - } - - VK_CALL(vkUpdateDescriptorSets(device->vk_device, descriptor_count, descriptor_writes, 0, NULL)); - bindings->push_descriptor_dirty_mask = 0; -- --done: -- vkd3d_free(descriptor_writes); -- vkd3d_free(buffer_infos); - } - - static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_command_list *list, -@@ -3063,7 +3053,7 @@ static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_comma - const struct vkd3d_shader_uav_counter_binding *uav_counter = &state->uav_counters.bindings[i]; - const VkBufferView *vk_uav_counter_views = bindings->vk_uav_counter_views; - -- assert(vk_uav_counter_views[i]); -+ VKD3D_ASSERT(vk_uav_counter_views[i]); - - vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; - vk_descriptor_writes[i].pNext = NULL; -@@ -3336,7 +3326,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list - return true; - - vk_render_pass = list->pso_render_pass; -- assert(vk_render_pass); -+ VKD3D_ASSERT(vk_render_pass); - - begin_desc.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; - begin_desc.pNext = NULL; -@@ -3392,11 +3382,11 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList6 *iface, - UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location, - UINT start_instance_location) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, " -@@ -3416,11 +3406,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCom - instance_count, start_vertex_location, start_instance_location)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList6 *iface, - UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location, - INT base_vertex_location, UINT start_instance_location) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, " -@@ -3442,10 +3432,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12Grap - instance_count, start_vertex_location, base_vertex_location, start_instance_location)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList6 *iface, - UINT x, UINT y, UINT z) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z); -@@ -3461,10 +3451,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandL - VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_vk_device_procs *vk_procs; - VkBufferCopy buffer_copy; -@@ -3476,9 +3466,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12Graphics - vk_procs = &list->device->vk_procs; - - dst_resource = unsafe_impl_from_ID3D12Resource(dst); -- assert(d3d12_resource_is_buffer(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(dst_resource)); - src_resource = unsafe_impl_from_ID3D12Resource(src); -- assert(d3d12_resource_is_buffer(src_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(src_resource)); - - d3d12_command_list_track_resource_usage(list, dst_resource); - d3d12_command_list_track_resource_usage(list, src_resource); -@@ -3679,11 +3669,11 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - src_format->dxgi_format, src_format->vk_format, - dst_format->dxgi_format, dst_format->vk_format); - -- assert(d3d12_resource_is_texture(dst_resource)); -- assert(d3d12_resource_is_texture(src_resource)); -- assert(!vkd3d_format_is_compressed(dst_format)); -- assert(!vkd3d_format_is_compressed(src_format)); -- assert(dst_format->byte_count == src_format->byte_count); -+ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); -+ VKD3D_ASSERT(!vkd3d_format_is_compressed(dst_format)); -+ VKD3D_ASSERT(!vkd3d_format_is_compressed(src_format)); -+ VKD3D_ASSERT(dst_format->byte_count == src_format->byte_count); - - buffer_image_copy.bufferOffset = 0; - buffer_image_copy.bufferRowLength = 0; -@@ -3727,11 +3717,11 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com - buffer_image_copy.imageSubresource.layerCount = layer_count; - dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; - -- assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == -+ VKD3D_ASSERT(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == - d3d12_resource_desc_get_width(dst_desc, dst_miplevel_idx)); -- assert(d3d12_resource_desc_get_height(src_desc, src_miplevel_idx) == -+ VKD3D_ASSERT(d3d12_resource_desc_get_height(src_desc, src_miplevel_idx) == - d3d12_resource_desc_get_height(dst_desc, dst_miplevel_idx)); -- assert(d3d12_resource_desc_get_depth(src_desc, src_miplevel_idx) == -+ VKD3D_ASSERT(d3d12_resource_desc_get_depth(src_desc, src_miplevel_idx) == - d3d12_resource_desc_get_depth(dst_desc, dst_miplevel_idx)); - - VK_CALL(vkCmdCopyBufferToImage(list->vk_command_buffer, -@@ -3746,11 +3736,11 @@ static bool validate_d3d12_box(const D3D12_BOX *box) - && box->back > box->front; - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList6 *iface, - const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z, - const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_format *src_format, *dst_format; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3779,8 +3769,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX - && dst->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT) - { -- assert(d3d12_resource_is_buffer(dst_resource)); -- assert(d3d12_resource_is_texture(src_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); - - if (!(dst_format = vkd3d_format_from_d3d12_resource_desc(list->device, - &src_resource->desc, dst->u.PlacedFootprint.Footprint.Format))) -@@ -3808,8 +3798,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - else if (src->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT - && dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX) - { -- assert(d3d12_resource_is_texture(dst_resource)); -- assert(d3d12_resource_is_buffer(src_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(src_resource)); - - if (!(src_format = vkd3d_format_from_d3d12_resource_desc(list->device, - &dst_resource->desc, src->u.PlacedFootprint.Footprint.Format))) -@@ -3837,8 +3827,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - else if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX - && dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX) - { -- assert(d3d12_resource_is_texture(dst_resource)); -- assert(d3d12_resource_is_texture(src_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); - - dst_format = dst_resource->format; - src_format = src_resource->format; -@@ -3871,10 +3861,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *dst, ID3D12Resource *src) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_format *dst_format, *src_format; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3897,8 +3887,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - - if (d3d12_resource_is_buffer(dst_resource)) - { -- assert(d3d12_resource_is_buffer(src_resource)); -- assert(src_resource->desc.Width == dst_resource->desc.Width); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(src_resource)); -+ VKD3D_ASSERT(src_resource->desc.Width == dst_resource->desc.Width); - - vk_buffer_copy.srcOffset = 0; - vk_buffer_copy.dstOffset = 0; -@@ -3912,10 +3902,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - dst_format = dst_resource->format; - src_format = src_resource->format; - -- assert(d3d12_resource_is_texture(dst_resource)); -- assert(d3d12_resource_is_texture(src_resource)); -- assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); -- assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); -+ VKD3D_ASSERT(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); -+ VKD3D_ASSERT(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); - - if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) - { -@@ -3941,7 +3931,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, - const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, - D3D12_TILE_COPY_FLAGS flags) -@@ -3952,11 +3942,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommand - buffer, buffer_offset, flags); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *dst, UINT dst_sub_resource_idx, - ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_format *src_format, *dst_format, *vk_format; - struct d3d12_resource *dst_resource, *src_resource; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -3972,8 +3962,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi - dst_resource = unsafe_impl_from_ID3D12Resource(dst); - src_resource = unsafe_impl_from_ID3D12Resource(src); - -- assert(d3d12_resource_is_texture(dst_resource)); -- assert(d3d12_resource_is_texture(src_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); - - d3d12_command_list_track_resource_usage(list, dst_resource); - d3d12_command_list_track_resource_usage(list, src_resource); -@@ -4019,10 +4009,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList6 *iface, - D3D12_PRIMITIVE_TOPOLOGY topology) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, topology %#x.\n", iface, topology); - -@@ -4033,11 +4023,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12Gr - d3d12_command_list_invalidate_current_pipeline(list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList6 *iface, - UINT viewport_count, const D3D12_VIEWPORT *viewports) - { - VkViewport vk_viewports[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; - -@@ -4071,10 +4061,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo - VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, viewport_count, vk_viewports)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList6 *iface, - UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - VkRect2D vk_rects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; -@@ -4099,10 +4089,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12Graphic - VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, rect_count, vk_rects)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList6 *iface, - const FLOAT blend_factor[4]) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, blend_factor %p.\n", iface, blend_factor); -@@ -4111,10 +4101,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12Graphics - VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer, blend_factor)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList6 *iface, - UINT stencil_ref) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - - TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref); -@@ -4123,11 +4113,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsC - VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer, VK_STENCIL_FRONT_AND_BACK, stencil_ref)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList6 *iface, - ID3D12PipelineState *pipeline_state) - { - struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state); - -@@ -4178,10 +4168,10 @@ static unsigned int d3d12_find_ds_multiplanar_transition(const D3D12_RESOURCE_BA - return 0; - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList6 *iface, - UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - bool have_aliasing_barriers = false, have_split_barriers = false; - const struct vkd3d_vk_device_procs *vk_procs; - const struct vkd3d_vulkan_info *vk_info; -@@ -4277,13 +4267,15 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC - } - - if (!vk_barrier_parameters_from_d3d12_resource_state(state_before, stencil_state_before, -- resource, list->vk_queue_flags, vk_info, &src_access_mask, &src_stage_mask, &layout_before)) -+ resource, list->vk_queue_flags, vk_info, &src_access_mask, -+ &src_stage_mask, &layout_before, list->device)) - { - FIXME("Unhandled state %#x.\n", state_before); - continue; - } - if (!vk_barrier_parameters_from_d3d12_resource_state(state_after, stencil_state_after, -- resource, list->vk_queue_flags, vk_info, &dst_access_mask, &dst_stage_mask, &layout_after)) -+ resource, list->vk_queue_flags, vk_info, &dst_access_mask, -+ &dst_stage_mask, &layout_after, list->device)) - { - FIXME("Unhandled state %#x.\n", state_after); - continue; -@@ -4303,7 +4295,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC - - resource = unsafe_impl_from_ID3D12Resource(uav->pResource); - vk_barrier_parameters_from_d3d12_resource_state(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0, -- resource, list->vk_queue_flags, vk_info, &access_mask, &stage_mask, &image_layout); -+ resource, list->vk_queue_flags, vk_info, &access_mask, -+ &stage_mask, &image_layout, list->device); - src_access_mask = dst_access_mask = access_mask; - src_stage_mask = dst_stage_mask = stage_mask; - layout_before = layout_after = image_layout; -@@ -4404,13 +4397,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC - WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n"); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList6 *iface, - ID3D12GraphicsCommandList *command_list) - { - FIXME("iface %p, command_list %p stub!\n", iface, command_list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList6 *iface, - UINT heap_count, ID3D12DescriptorHeap *const *heaps) - { - TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); -@@ -4436,10 +4429,10 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis - d3d12_command_list_invalidate_root_parameters(list, bind_point); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList6 *iface, - ID3D12RootSignature *root_signature) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_signature %p.\n", iface, root_signature); - -@@ -4447,10 +4440,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12G - unsafe_impl_from_ID3D12RootSignature(root_signature)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList6 *iface, - ID3D12RootSignature *root_signature) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_signature %p.\n", iface, root_signature); - -@@ -4466,9 +4459,9 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l - struct d3d12_descriptor_heap *descriptor_heap; - struct d3d12_desc *desc; - -- assert(root_signature_get_descriptor_table(root_signature, index)); -+ VKD3D_ASSERT(root_signature_get_descriptor_table(root_signature, index)); - -- assert(index < ARRAY_SIZE(bindings->descriptor_tables)); -+ VKD3D_ASSERT(index < ARRAY_SIZE(bindings->descriptor_tables)); - desc = d3d12_desc_from_gpu_handle(base_descriptor); - - if (bindings->descriptor_tables[index] == desc) -@@ -4489,10 +4482,10 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l - bindings->descriptor_table_active_mask |= (uint64_t)1 << index; - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList6 *iface, - UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, base_descriptor %s.\n", - iface, root_parameter_index, debug_gpu_handle(base_descriptor)); -@@ -4501,10 +4494,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(I - root_parameter_index, base_descriptor); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList6 *iface, - UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, base_descriptor %s.\n", - iface, root_parameter_index, debug_gpu_handle(base_descriptor)); -@@ -4526,10 +4519,10 @@ static void d3d12_command_list_set_root_constants(struct d3d12_command_list *lis - c->stage_flags, c->offset + offset * sizeof(uint32_t), count * sizeof(uint32_t), data)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList6 *iface, - UINT root_parameter_index, UINT data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", - iface, root_parameter_index, data, dst_offset); -@@ -4538,10 +4531,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3 - root_parameter_index, dst_offset, 1, &data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList6 *iface, - UINT root_parameter_index, UINT data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", - iface, root_parameter_index, data, dst_offset); -@@ -4550,10 +4543,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID - root_parameter_index, dst_offset, 1, &data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList6 *iface, - UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", - iface, root_parameter_index, constant_count, data, dst_offset); -@@ -4562,10 +4555,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID - root_parameter_index, dst_offset, constant_count, data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList6 *iface, - UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", - iface, root_parameter_index, constant_count, data, dst_offset); -@@ -4587,7 +4580,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, - struct d3d12_resource *resource; - - root_parameter = root_signature_get_root_descriptor(root_signature, index); -- assert(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); -+ VKD3D_ASSERT(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); - - if (gpu_address) - { -@@ -4618,7 +4611,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, - root_parameter, bindings->descriptor_sets[0], NULL, &buffer_info); - VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); - -- assert(index < ARRAY_SIZE(bindings->push_descriptors)); -+ VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); - bindings->push_descriptors[index].u.cbv.vk_buffer = buffer_info.buffer; - bindings->push_descriptors[index].u.cbv.offset = buffer_info.offset; - bindings->push_descriptor_dirty_mask |= 1u << index; -@@ -4627,9 +4620,9 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView( -- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4638,9 +4631,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferVie - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView( -- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4661,7 +4654,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li - VkBufferView vk_buffer_view; - - root_parameter = root_signature_get_root_descriptor(root_signature, index); -- assert(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); -+ VKD3D_ASSERT(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); - - /* FIXME: Re-use buffer views. */ - if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, root_parameter->parameter_type, &vk_buffer_view)) -@@ -4691,7 +4684,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li - root_parameter, bindings->descriptor_sets[0], &vk_buffer_view, NULL); - VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); - -- assert(index < ARRAY_SIZE(bindings->push_descriptors)); -+ VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); - bindings->push_descriptors[index].u.vk_buffer_view = vk_buffer_view; - bindings->push_descriptor_dirty_mask |= 1u << index; - bindings->push_descriptor_active_mask |= 1u << index; -@@ -4699,9 +4692,9 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView( -- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4711,9 +4704,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceVie - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView( -- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4723,9 +4716,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceVi - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView( -- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4735,9 +4728,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessVi - } - - static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView( -- ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) -+ ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - - TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", - iface, root_parameter_index, address); -@@ -4746,10 +4739,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessV - root_parameter_index, address); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList6 *iface, - const D3D12_INDEX_BUFFER_VIEW *view) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_vk_device_procs *vk_procs; - struct d3d12_resource *resource; - enum VkIndexType index_type; -@@ -4789,10 +4782,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics - view->BufferLocation - resource->gpu_address, index_type)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList6 *iface, - UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct vkd3d_null_resources *null_resources; - struct vkd3d_gpu_va_allocator *gpu_va_allocator; - VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; -@@ -4814,15 +4807,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi - return; - } - -- if (!views) -- { -- WARN("NULL \"views\" pointer specified.\n"); -- return; -- } -- - for (i = 0; i < view_count; ++i) - { -- if (views[i].BufferLocation) -+ if (views && views[i].BufferLocation) - { - resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferLocation); - buffers[i] = resource->u.vk_buffer; -@@ -4847,10 +4834,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi - d3d12_command_list_invalidate_current_pipeline(list); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList6 *iface, - UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)]; - VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)]; - VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)]; -@@ -4912,11 +4899,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsComm - VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList6 *iface, - UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors, - BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct d3d12_rtv_desc *rtv_desc; - const struct d3d12_dsv_desc *dsv_desc; - VkFormat prev_dsv_format; -@@ -5117,12 +5104,12 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList6 *iface, - D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil, - UINT rect_count, const D3D12_RECT *rects) - { - const union VkClearValue clear_value = {.depthStencil = {depth, stencil}}; -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv); - struct VkAttachmentDescription attachment_desc; - struct VkAttachmentReference ds_reference; -@@ -5166,10 +5153,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12Gra - &clear_value, rect_count, rects); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList6 *iface, - D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv); - struct VkAttachmentDescription attachment_desc; - struct VkAttachmentReference color_reference; -@@ -5288,11 +5275,13 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, - struct d3d12_resource *resource, struct vkd3d_view *descriptor, const VkClearColorValue *clear_colour, - unsigned int rect_count, const D3D12_RECT *rects) - { -+ const VkPhysicalDeviceLimits *device_limits = &list->device->vk_info.device_limits; - const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - unsigned int i, miplevel_idx, layer_count; - struct vkd3d_uav_clear_pipeline pipeline; - struct vkd3d_uav_clear_args clear_args; - const struct vkd3d_resource_view *view; -+ uint32_t count_x, count_y, count_z; - VkDescriptorImageInfo image_info; - D3D12_RECT full_rect, curr_rect; - VkWriteDescriptorSet write_set; -@@ -5383,18 +5372,32 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, - if (curr_rect.left >= curr_rect.right || curr_rect.top >= curr_rect.bottom) - continue; - -- clear_args.offset.x = curr_rect.left; - clear_args.offset.y = curr_rect.top; -- clear_args.extent.width = curr_rect.right - curr_rect.left; - clear_args.extent.height = curr_rect.bottom - curr_rect.top; - -- VK_CALL(vkCmdPushConstants(list->vk_command_buffer, pipeline.vk_pipeline_layout, -- VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(clear_args), &clear_args)); -+ count_y = vkd3d_compute_workgroup_count(clear_args.extent.height, pipeline.group_size.height); -+ count_z = vkd3d_compute_workgroup_count(layer_count, pipeline.group_size.depth); -+ if (count_y > device_limits->maxComputeWorkGroupCount[1]) -+ FIXME("Group Y count %u exceeds max %u.\n", count_y, device_limits->maxComputeWorkGroupCount[1]); -+ if (count_z > device_limits->maxComputeWorkGroupCount[2]) -+ FIXME("Group Z count %u exceeds max %u.\n", count_z, device_limits->maxComputeWorkGroupCount[2]); -+ -+ do -+ { -+ clear_args.offset.x = curr_rect.left; -+ clear_args.extent.width = curr_rect.right - curr_rect.left; -+ -+ count_x = vkd3d_compute_workgroup_count(clear_args.extent.width, pipeline.group_size.width); -+ count_x = min(count_x, device_limits->maxComputeWorkGroupCount[0]); - -- VK_CALL(vkCmdDispatch(list->vk_command_buffer, -- vkd3d_compute_workgroup_count(clear_args.extent.width, pipeline.group_size.width), -- vkd3d_compute_workgroup_count(clear_args.extent.height, pipeline.group_size.height), -- vkd3d_compute_workgroup_count(layer_count, pipeline.group_size.depth))); -+ VK_CALL(vkCmdPushConstants(list->vk_command_buffer, pipeline.vk_pipeline_layout, -+ VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(clear_args), &clear_args)); -+ -+ VK_CALL(vkCmdDispatch(list->vk_command_buffer, count_x, count_y, count_z)); -+ -+ curr_rect.left += count_x * pipeline.group_size.width; -+ } -+ while (curr_rect.right > curr_rect.left); - } - } - -@@ -5434,15 +5437,59 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList5 *iface, -+static struct vkd3d_view *create_uint_view(struct d3d12_device *device, const struct vkd3d_resource_view *view, -+ struct d3d12_resource *resource, VkClearColorValue *colour) -+{ -+ struct vkd3d_texture_view_desc view_desc; -+ const struct vkd3d_format *uint_format; -+ struct vkd3d_view *uint_view; -+ -+ if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) -+ && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, colour))) -+ { -+ ERR("Unhandled format %#x.\n", view->format->dxgi_format); -+ return NULL; -+ } -+ -+ if (d3d12_resource_is_buffer(resource)) -+ { -+ if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_buffer, -+ uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) -+ { -+ ERR("Failed to create buffer view.\n"); -+ return NULL; -+ } -+ -+ return uint_view; -+ } -+ -+ memset(&view_desc, 0, sizeof(view_desc)); -+ view_desc.view_type = view->info.texture.vk_view_type; -+ view_desc.format = uint_format; -+ view_desc.miplevel_idx = view->info.texture.miplevel_idx; -+ view_desc.miplevel_count = 1; -+ view_desc.layer_idx = view->info.texture.layer_idx; -+ view_desc.layer_count = view->info.texture.layer_count; -+ view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; -+ view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; -+ -+ if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, -+ resource->u.vk_image, &view_desc, &uint_view)) -+ { -+ ERR("Failed to create image view.\n"); -+ return NULL; -+ } -+ -+ return uint_view; -+} -+ -+static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList6 *iface, - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, - const UINT values[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct vkd3d_view *descriptor, *uint_view = NULL; - struct d3d12_device *device = list->device; -- struct vkd3d_texture_view_desc view_desc; -- const struct vkd3d_format *uint_format; - const struct vkd3d_resource_view *view; - struct d3d12_resource *resource_impl; - VkClearColorValue colour; -@@ -5456,44 +5503,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID - view = &descriptor->v; - memcpy(colour.uint32, values, sizeof(colour.uint32)); - -- if (view->format->type != VKD3D_FORMAT_TYPE_UINT) -+ if (view->format->type != VKD3D_FORMAT_TYPE_UINT -+ && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) - { -- if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) -- && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, &colour))) -- { -- ERR("Unhandled format %#x.\n", view->format->dxgi_format); -- return; -- } -- -- if (d3d12_resource_is_buffer(resource_impl)) -- { -- if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, -- uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) -- { -- ERR("Failed to create buffer view.\n"); -- return; -- } -- } -- else -- { -- memset(&view_desc, 0, sizeof(view_desc)); -- view_desc.view_type = view->info.texture.vk_view_type; -- view_desc.format = uint_format; -- view_desc.miplevel_idx = view->info.texture.miplevel_idx; -- view_desc.miplevel_count = 1; -- view_desc.layer_idx = view->info.texture.layer_idx; -- view_desc.layer_count = view->info.texture.layer_count; -- view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; -- view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; -- -- if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, -- &uint_view)) -- { -- ERR("Failed to create image view.\n"); -- return; -- } -- } -- descriptor = uint_view; -+ ERR("Failed to create UINT view.\n"); -+ return; - } - - d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); -@@ -5502,36 +5516,49 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID - vkd3d_view_decref(uint_view, device); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList6 *iface, - D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, - const float values[4], UINT rect_count, const D3D12_RECT *rects) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); -+ struct vkd3d_view *descriptor, *uint_view = NULL; -+ struct d3d12_device *device = list->device; -+ const struct vkd3d_resource_view *view; - struct d3d12_resource *resource_impl; - VkClearColorValue colour; -- struct vkd3d_view *view; - - TRACE("iface %p, gpu_handle %s, cpu_handle %s, resource %p, values %p, rect_count %u, rects %p.\n", - iface, debug_gpu_handle(gpu_handle), debug_cpu_handle(cpu_handle), resource, values, rect_count, rects); - - resource_impl = unsafe_impl_from_ID3D12Resource(resource); -- if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) -+ if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) - return; -+ view = &descriptor->v; - memcpy(colour.float32, values, sizeof(colour.float32)); - -- d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); -+ if (view->format->type == VKD3D_FORMAT_TYPE_SINT -+ && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) -+ { -+ ERR("Failed to create UINT view.\n"); -+ return; -+ } -+ -+ d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); -+ -+ if (uint_view) -+ vkd3d_view_decref(uint_view, device); - } - --static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *resource, const D3D12_DISCARD_REGION *region) - { - FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList6 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - const struct vkd3d_vk_device_procs *vk_procs; - VkQueryControlFlags flags = 0; -@@ -5558,10 +5585,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsComman - VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags)); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList6 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - const struct vkd3d_vk_device_procs *vk_procs; - -@@ -5603,12 +5630,12 @@ static size_t get_query_stride(D3D12_QUERY_TYPE type) - return sizeof(uint64_t); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList6 *iface, - ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count, - ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset) - { - const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer); - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i, first, count; -@@ -5684,10 +5711,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12Graphics - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer); - const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; - const struct vkd3d_vk_device_procs *vk_procs; -@@ -5756,19 +5783,19 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCo - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList6 *iface, - UINT metadata, const void *data, UINT size) - { - FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList6 *iface, - UINT metadata, const void *data, UINT size) - { - FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList5 *iface) -+static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList6 *iface) - { - FIXME("iface %p stub!\n", iface); - } -@@ -5777,14 +5804,14 @@ STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMEN - STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS)); - STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS)); - --static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList6 *iface, - ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer, - UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset) - { - struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature); - struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer); - struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer); -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - const D3D12_COMMAND_SIGNATURE_DESC *signature_desc; - const struct vkd3d_vk_device_procs *vk_procs; - unsigned int i; -@@ -5883,7 +5910,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC - d3d12_command_signature_decref(sig_impl); - } - --static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *dst_buffer, UINT64 dst_offset, - ID3D12Resource *src_buffer, UINT64 src_offset, - UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, -@@ -5896,7 +5923,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12Grap - dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); - } - --static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *dst_buffer, UINT64 dst_offset, - ID3D12Resource *src_buffer, UINT64 src_offset, - UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, -@@ -5909,20 +5936,20 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr - dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); - } - --static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList6 *iface, - FLOAT min, FLOAT max) - { - FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList6 *iface, - UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions) - { - FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n", - iface, sample_count, pixel_count, sample_positions); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y, - ID3D12Resource *src_resource, UINT src_sub_resource_idx, - D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode) -@@ -5934,16 +5961,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12 - src_resource, src_sub_resource_idx, src_rect, format, mode); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList5 *iface, UINT mask) -+static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList6 *iface, UINT mask) - { - FIXME("iface %p, mask %#x stub!\n", iface, mask); - } - --static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList6 *iface, - UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters, - const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes) - { -- struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); -+ struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); - struct d3d12_resource *resource; - unsigned int i; - -@@ -5956,13 +5983,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12Grap - } - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList6 *iface, - ID3D12ProtectedResourceSession *protected_session) - { - FIXME("iface %p, protected_session %p stub!\n", iface, protected_session); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(ID3D12GraphicsCommandList6 *iface, - UINT count, const D3D12_RENDER_PASS_RENDER_TARGET_DESC *render_targets, - const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC *depth_stencil, D3D12_RENDER_PASS_FLAGS flags) - { -@@ -5970,74 +5997,78 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(ID3D12GraphicsC - count, render_targets, depth_stencil, flags); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EndRenderPass(ID3D12GraphicsCommandList5 *iface) -+static void STDMETHODCALLTYPE d3d12_command_list_EndRenderPass(ID3D12GraphicsCommandList6 *iface) - { - FIXME("iface %p stub!\n", iface); - } - --static void STDMETHODCALLTYPE d3d12_command_list_InitializeMetaCommand(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_InitializeMetaCommand(ID3D12GraphicsCommandList6 *iface, - ID3D12MetaCommand *meta_command, const void *parameters_data, SIZE_T data_size_in_bytes) - { - FIXME("iface %p, meta_command %p, parameters_data %p, data_size_in_bytes %"PRIuPTR" stub!\n", iface, - meta_command, parameters_data, (uintptr_t)data_size_in_bytes); - } - --static void STDMETHODCALLTYPE d3d12_command_list_ExecuteMetaCommand(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_ExecuteMetaCommand(ID3D12GraphicsCommandList6 *iface, - ID3D12MetaCommand *meta_command, const void *parameters_data, SIZE_T data_size_in_bytes) - { - FIXME("iface %p, meta_command %p, parameters_data %p, data_size_in_bytes %"PRIuPTR" stub!\n", iface, - meta_command, parameters_data, (uintptr_t)data_size_in_bytes); - } - --static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStructure(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStructure(ID3D12GraphicsCommandList6 *iface, - const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *desc, UINT count, - const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *postbuild_info_descs) - { - FIXME("iface %p, desc %p, count %u, postbuild_info_descs %p stub!\n", iface, desc, count, postbuild_info_descs); - } - --static void STDMETHODCALLTYPE d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo(ID3D12GraphicsCommandList5 *iface, -- const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc, -+static void STDMETHODCALLTYPE d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo( -+ ID3D12GraphicsCommandList6 *iface, const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc, - UINT structures_count, const D3D12_GPU_VIRTUAL_ADDRESS *src_structure_data) - { - FIXME("iface %p, desc %p, structures_count %u, src_structure_data %p stub!\n", - iface, desc, structures_count, src_structure_data); - } - --static void STDMETHODCALLTYPE d3d12_command_list_CopyRaytracingAccelerationStructure(ID3D12GraphicsCommandList5 *iface, -- D3D12_GPU_VIRTUAL_ADDRESS dst_structure_data, -- D3D12_GPU_VIRTUAL_ADDRESS src_structure_data, -+static void STDMETHODCALLTYPE d3d12_command_list_CopyRaytracingAccelerationStructure(ID3D12GraphicsCommandList6 *iface, -+ D3D12_GPU_VIRTUAL_ADDRESS dst_structure_data, D3D12_GPU_VIRTUAL_ADDRESS src_structure_data, - D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE mode) - { - FIXME("iface %p, dst_structure_data %#"PRIx64", src_structure_data %#"PRIx64", mode %u stub!\n", - iface, dst_structure_data, src_structure_data, mode); - } - --static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1(ID3D12GraphicsCommandList6 *iface, - ID3D12StateObject *state_object) - { - FIXME("iface %p, state_object %p stub!\n", iface, state_object); - } - --static void STDMETHODCALLTYPE d3d12_command_list_DispatchRays(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_DispatchRays(ID3D12GraphicsCommandList6 *iface, - const D3D12_DISPATCH_RAYS_DESC *desc) - { - FIXME("iface %p, desc %p stub!\n", iface, desc); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRate(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRate(ID3D12GraphicsCommandList6 *iface, - D3D12_SHADING_RATE rate, const D3D12_SHADING_RATE_COMBINER *combiners) - { - FIXME("iface %p, rate %#x, combiners %p stub!\n", iface, rate, combiners); - } - --static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage(ID3D12GraphicsCommandList5 *iface, -+static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage(ID3D12GraphicsCommandList6 *iface, - ID3D12Resource *rate_image) - { - FIXME("iface %p, rate_image %p stub!\n", iface, rate_image); - } - --static const struct ID3D12GraphicsCommandList5Vtbl d3d12_command_list_vtbl = -+static void STDMETHODCALLTYPE d3d12_command_list_DispatchMesh(ID3D12GraphicsCommandList6 *iface, UINT x, UINT y, UINT z) -+{ -+ FIXME("iface %p, x %u, y %u, z %u stub!\n", iface, x, y, z); -+} -+ -+static const struct ID3D12GraphicsCommandList6Vtbl d3d12_command_list_vtbl = - { - /* IUnknown methods */ - d3d12_command_list_QueryInterface, -@@ -6128,14 +6159,16 @@ static const struct ID3D12GraphicsCommandList5Vtbl d3d12_command_list_vtbl = - /* ID3D12GraphicsCommandList5 methods */ - d3d12_command_list_RSSetShadingRate, - d3d12_command_list_RSSetShadingRateImage, -+ /* ID3D12GraphicsCommandList6 methods */ -+ d3d12_command_list_DispatchMesh, - }; - - static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface) - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); -- return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList5_iface); -+ VKD3D_ASSERT(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); -+ return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList6_iface); - } - - static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, -@@ -6144,7 +6177,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d - { - HRESULT hr; - -- list->ID3D12GraphicsCommandList5_iface.lpVtbl = &d3d12_command_list_vtbl; -+ list->ID3D12GraphicsCommandList6_iface.lpVtbl = &d3d12_command_list_vtbl; - list->refcount = 1; - - list->type = type; -@@ -6748,7 +6781,7 @@ static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_qu - } - - vk_semaphore = fence->timeline_semaphore; -- assert(vk_semaphore); -+ VKD3D_ASSERT(vk_semaphore); - } - else - { -@@ -6821,7 +6854,7 @@ static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_qu - return hr; - - vk_semaphore = fence->timeline_semaphore; -- assert(vk_semaphore); -+ VKD3D_ASSERT(vk_semaphore); - - return vkd3d_enqueue_timeline_semaphore(&command_queue->fence_worker, - vk_semaphore, fence, timeline_value, vkd3d_queue); -@@ -6990,7 +7023,7 @@ static HRESULT d3d12_command_queue_wait_locked(struct d3d12_command_queue *comma - * until we have submitted, so the semaphore cannot be destroyed before the call to vkQueueSubmit. */ - vkd3d_mutex_unlock(&fence->mutex); - -- assert(fence->timeline_semaphore); -+ VKD3D_ASSERT(fence->timeline_semaphore); - timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; - timeline_submit_info.pNext = NULL; - timeline_submit_info.waitSemaphoreValueCount = 1; -@@ -7254,7 +7287,7 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * - - queue->is_flushing = true; - -- assert(queue->aux_op_queue.count == 0); -+ VKD3D_ASSERT(queue->aux_op_queue.count == 0); - - while (queue->op_queue.count != 0) - { -@@ -7544,7 +7577,7 @@ struct d3d12_command_signature *unsafe_impl_from_ID3D12CommandSignature(ID3D12Co - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_command_signature_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_command_signature_vtbl); - return CONTAINING_RECORD(iface, struct d3d12_command_signature, ID3D12CommandSignature_iface); - } - -diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c -index cfc9c5f5ed3..01841c89692 100644 ---- a/libs/vkd3d/libs/vkd3d/device.c -+++ b/libs/vkd3d/libs/vkd3d/device.c -@@ -76,6 +76,14 @@ static const char * const required_device_extensions[] = - VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, - }; - -+/* In general we don't want to enable Vulkan beta extensions, but make an -+ * exception for VK_KHR_portability_subset because we draw no real feature from -+ * it, but it's still useful to be able to develop for MoltenVK without being -+ * spammed with validation errors. */ -+#ifndef VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME -+#define VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME "VK_KHR_portability_subset" -+#endif -+ - static const struct vkd3d_optional_extension_info optional_device_extensions[] = - { - /* KHR extensions */ -@@ -85,6 +93,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = - VK_EXTENSION(KHR_IMAGE_FORMAT_LIST, KHR_image_format_list), - VK_EXTENSION(KHR_MAINTENANCE2, KHR_maintenance2), - VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), -+ VK_EXTENSION(KHR_PORTABILITY_SUBSET, KHR_portability_subset), - VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), - VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), - VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), -@@ -92,7 +101,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = - VK_EXTENSION(EXT_4444_FORMATS, EXT_4444_formats), - VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), - VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), -- VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), -+ VK_DEBUG_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), - VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), - VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), - VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), -@@ -299,7 +308,7 @@ static unsigned int vkd3d_check_extensions(const VkExtensionProperties *extensio - for (i = 0; i < required_extension_count; ++i) - { - if (!has_extension(extensions, count, required_extensions[i])) -- ERR("Required %s extension %s is not supported.\n", -+ WARN("Required %s extension %s is not supported.\n", - extension_type, debugstr_a(required_extensions[i])); - ++extension_count; - } -@@ -327,12 +336,12 @@ static unsigned int vkd3d_check_extensions(const VkExtensionProperties *extensio - for (i = 0; i < user_extension_count; ++i) - { - if (!has_extension(extensions, count, user_extensions[i])) -- ERR("Required user %s extension %s is not supported.\n", -+ WARN("Required user %s extension %s is not supported.\n", - extension_type, debugstr_a(user_extensions[i])); - ++extension_count; - } - -- assert(!optional_user_extension_count || user_extension_supported); -+ VKD3D_ASSERT(!optional_user_extension_count || user_extension_supported); - for (i = 0; i < optional_user_extension_count; ++i) - { - if (has_extension(extensions, count, optional_user_extensions[i])) -@@ -394,7 +403,7 @@ static unsigned int vkd3d_enable_extensions(const char *extensions[], - { - extension_count = vkd3d_append_extension(extensions, extension_count, user_extensions[i]); - } -- assert(!optional_user_extension_count || user_extension_supported); -+ VKD3D_ASSERT(!optional_user_extension_count || user_extension_supported); - for (i = 0; i < optional_user_extension_count; ++i) - { - if (!user_extension_supported[i]) -@@ -575,7 +584,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, - - if (!create_info->pfn_signal_event) - { -- ERR("Invalid signal event function pointer.\n"); -+ WARN("Invalid signal event function pointer.\n"); - return E_INVALIDARG; - } - if (!create_info->pfn_create_thread != !create_info->pfn_join_thread) -@@ -585,7 +594,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, - } - if (create_info->wchar_size != 2 && create_info->wchar_size != 4) - { -- ERR("Unexpected WCHAR size %zu.\n", create_info->wchar_size); -+ WARN("Unexpected WCHAR size %zu.\n", create_info->wchar_size); - return E_INVALIDARG; - } - -@@ -822,114 +831,90 @@ struct vkd3d_physical_device_info - VkPhysicalDeviceFeatures2 features2; - }; - --static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *info, struct d3d12_device *device) -+static void vkd3d_chain_physical_device_info_structures(struct vkd3d_physical_device_info *info, -+ struct d3d12_device *device) - { -- const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; -- VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; -- VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties; -- VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; -- VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; -- VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; -- VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; -- VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; -- VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; -- VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; -- VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; -- VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *timeline_semaphore_features; -- VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT *mutable_features; -- VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features; -- VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; -- VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; -- VkPhysicalDevice physical_device = device->vk_physical_device; -- VkPhysicalDevice4444FormatsFeaturesEXT *formats4444_features; -- VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb_features; - struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; -- VkPhysicalDeviceSubgroupProperties *subgroup_properties; -- -- memset(info, 0, sizeof(*info)); -- conditional_rendering_features = &info->conditional_rendering_features; -- depth_clip_features = &info->depth_clip_features; -- descriptor_indexing_features = &info->descriptor_indexing_features; -- fragment_shader_interlock_features = &info->fragment_shader_interlock_features; -- robustness2_features = &info->robustness2_features; -- descriptor_indexing_properties = &info->descriptor_indexing_properties; -- maintenance3_properties = &info->maintenance3_properties; -- demote_features = &info->demote_features; -- buffer_alignment_features = &info->texel_buffer_alignment_features; -- buffer_alignment_properties = &info->texel_buffer_alignment_properties; -- vertex_divisor_features = &info->vertex_divisor_features; -- vertex_divisor_properties = &info->vertex_divisor_properties; -- timeline_semaphore_features = &info->timeline_semaphore_features; -- mutable_features = &info->mutable_features; -- formats4444_features = &info->formats4444_features; -- xfb_features = &info->xfb_features; -- xfb_properties = &info->xfb_properties; -- subgroup_properties = &info->subgroup_properties; - -- info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; -+ info->features2.pNext = NULL; - -- conditional_rendering_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; - if (vulkan_info->EXT_conditional_rendering) -- vk_prepend_struct(&info->features2, conditional_rendering_features); -- depth_clip_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->conditional_rendering_features); - if (vulkan_info->EXT_depth_clip_enable) -- vk_prepend_struct(&info->features2, depth_clip_features); -- descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->depth_clip_features); - if (vulkan_info->EXT_descriptor_indexing) -- vk_prepend_struct(&info->features2, descriptor_indexing_features); -- fragment_shader_interlock_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->descriptor_indexing_features); - if (vulkan_info->EXT_fragment_shader_interlock) -- vk_prepend_struct(&info->features2, fragment_shader_interlock_features); -- robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->fragment_shader_interlock_features); - if (vulkan_info->EXT_robustness2) -- vk_prepend_struct(&info->features2, robustness2_features); -- demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->robustness2_features); - if (vulkan_info->EXT_shader_demote_to_helper_invocation) -- vk_prepend_struct(&info->features2, demote_features); -- buffer_alignment_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->demote_features); - if (vulkan_info->EXT_texel_buffer_alignment) -- vk_prepend_struct(&info->features2, buffer_alignment_features); -- xfb_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->texel_buffer_alignment_features); - if (vulkan_info->EXT_transform_feedback) -- vk_prepend_struct(&info->features2, xfb_features); -- vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->xfb_features); - if (vulkan_info->EXT_vertex_attribute_divisor) -- vk_prepend_struct(&info->features2, vertex_divisor_features); -- timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; -+ vk_prepend_struct(&info->features2, &info->vertex_divisor_features); - if (vulkan_info->KHR_timeline_semaphore) -- vk_prepend_struct(&info->features2, timeline_semaphore_features); -- mutable_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->timeline_semaphore_features); - if (vulkan_info->EXT_mutable_descriptor_type) -- vk_prepend_struct(&info->features2, mutable_features); -- formats4444_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; -+ vk_prepend_struct(&info->features2, &info->mutable_features); - if (vulkan_info->EXT_4444_formats) -- vk_prepend_struct(&info->features2, formats4444_features); -- -- if (vulkan_info->KHR_get_physical_device_properties2) -- VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); -- else -- VK_CALL(vkGetPhysicalDeviceFeatures(physical_device, &info->features2.features)); -+ vk_prepend_struct(&info->features2, &info->formats4444_features); - -- info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; -+ info->properties2.pNext = NULL; - -- maintenance3_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; - if (vulkan_info->KHR_maintenance3) -- vk_prepend_struct(&info->properties2, maintenance3_properties); -- descriptor_indexing_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; -+ vk_prepend_struct(&info->properties2, &info->maintenance3_properties); - if (vulkan_info->EXT_descriptor_indexing) -- vk_prepend_struct(&info->properties2, descriptor_indexing_properties); -- buffer_alignment_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; -+ vk_prepend_struct(&info->properties2, &info->descriptor_indexing_properties); - if (vulkan_info->EXT_texel_buffer_alignment) -- vk_prepend_struct(&info->properties2, buffer_alignment_properties); -- xfb_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; -+ vk_prepend_struct(&info->properties2, &info->texel_buffer_alignment_properties); - if (vulkan_info->EXT_transform_feedback) -- vk_prepend_struct(&info->properties2, xfb_properties); -- vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; -+ vk_prepend_struct(&info->properties2, &info->xfb_properties); - if (vulkan_info->EXT_vertex_attribute_divisor) -- vk_prepend_struct(&info->properties2, vertex_divisor_properties); -- subgroup_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; -+ vk_prepend_struct(&info->properties2, &info->vertex_divisor_properties); - if (d3d12_device_environment_is_vulkan_min_1_1(device)) -- vk_prepend_struct(&info->properties2, subgroup_properties); -+ vk_prepend_struct(&info->properties2, &info->subgroup_properties); -+} -+ -+static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *info, struct d3d12_device *device) -+{ -+ const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; -+ VkPhysicalDevice physical_device = device->vk_physical_device; -+ struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; -+ -+ memset(info, 0, sizeof(*info)); -+ -+ info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; -+ info->conditional_rendering_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; -+ info->depth_clip_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; -+ info->descriptor_indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; -+ info->fragment_shader_interlock_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; -+ info->robustness2_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; -+ info->demote_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; -+ info->texel_buffer_alignment_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; -+ info->xfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; -+ info->vertex_divisor_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; -+ info->timeline_semaphore_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; -+ info->mutable_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; -+ info->formats4444_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; -+ -+ info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; -+ info->maintenance3_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; -+ info->descriptor_indexing_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; -+ info->texel_buffer_alignment_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; -+ info->xfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; -+ info->vertex_divisor_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; -+ info->subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; -+ -+ vkd3d_chain_physical_device_info_structures(info, device); -+ -+ if (vulkan_info->KHR_get_physical_device_properties2) -+ VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); -+ else -+ VK_CALL(vkGetPhysicalDeviceFeatures(physical_device, &info->features2.features)); - - if (vulkan_info->KHR_get_physical_device_properties2) - VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); -@@ -1522,7 +1507,7 @@ static bool d3d12_device_supports_typed_uav_load_additional_formats(const struct - for (i = 0; i < ARRAY_SIZE(additional_formats); ++i) - { - format = vkd3d_get_format(device, additional_formats[i], false); -- assert(format); -+ VKD3D_ASSERT(format); - - VK_CALL(vkGetPhysicalDeviceFormatProperties(device->vk_physical_device, format->vk_format, &properties)); - if (!((properties.linearTilingFeatures | properties.optimalTilingFeatures) & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) -@@ -1634,6 +1619,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - - vulkan_info->device_limits = physical_device_info->properties2.properties.limits; - vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; -+ vulkan_info->geometry_shaders = physical_device_info->features2.features.geometryShader; -+ vulkan_info->tessellation_shaders = physical_device_info->features2.features.tessellationShader; - vulkan_info->sparse_binding = features->sparseBinding; - vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; - vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; -@@ -1829,6 +1816,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, - vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, - &physical_device_info->properties2.properties.limits); - -+ vkd3d_chain_physical_device_info_structures(physical_device_info, device); -+ - return S_OK; - } - -@@ -2166,7 +2155,7 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, - vkd3d_free(extensions); - if (vr < 0) - { -- ERR("Failed to create Vulkan device, vr %d.\n", vr); -+ WARN("Failed to create Vulkan device, vr %d.\n", vr); - return hresult_from_vk_result(vr); - } - -@@ -2552,11 +2541,13 @@ static void device_init_descriptor_pool_sizes(struct d3d12_device *device) - VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); - pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; - pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; -- device->vk_pool_count = 2; -+ pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLER; -+ pool_sizes[2].descriptorCount = min(limits->sampler_max_descriptors, D3D12_MAX_LIVE_STATIC_SAMPLERS); -+ device->vk_pool_count = 3; - return; - } - -- assert(ARRAY_SIZE(device->vk_pool_sizes) >= 6); -+ VKD3D_ASSERT(ARRAY_SIZE(device->vk_pool_sizes) >= 6); - pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; - pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, - VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); -@@ -3128,8 +3119,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device9 *i - initial_pipeline_state, &object))) - return hr; - -- return return_interface(&object->ID3D12GraphicsCommandList5_iface, -- &IID_ID3D12GraphicsCommandList5, riid, command_list); -+ return return_interface(&object->ID3D12GraphicsCommandList6_iface, -+ &IID_ID3D12GraphicsCommandList6, riid, command_list); - } - - /* Direct3D feature levels restrict which formats can be optionally supported. */ -@@ -3806,7 +3797,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 - return E_INVALIDARG; - } - -- data->UnalignedBlockTexturesSupported = FALSE; -+ /* Vulkan does not restrict block texture alignment. */ -+ data->UnalignedBlockTexturesSupported = TRUE; - - TRACE("Unaligned block texture support %#x.\n", data->UnalignedBlockTexturesSupported); - return S_OK; -@@ -5262,7 +5254,7 @@ struct d3d12_device *unsafe_impl_from_ID3D12Device9(ID3D12Device9 *iface) - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_device_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_device_vtbl); - return impl_from_ID3D12Device9(iface); - } - -diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c -index c897d9f2c5a..6d6820d3752 100644 ---- a/libs/vkd3d/libs/vkd3d/resource.c -+++ b/libs/vkd3d/libs/vkd3d/resource.c -@@ -312,7 +312,7 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_AddRef(ID3D12Heap *iface) - - TRACE("%p increasing refcount to %u.\n", heap, refcount); - -- assert(!heap->is_private); -+ VKD3D_ASSERT(!heap->is_private); - - return refcount; - } -@@ -443,7 +443,7 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_heap_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_heap_vtbl); - return impl_from_ID3D12Heap(iface); - } - -@@ -950,8 +950,8 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, - bool tiled; - HRESULT hr; - -- assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); -- assert(d3d12_resource_validate_desc(desc, device) == S_OK); -+ VKD3D_ASSERT(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); -+ VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device) == S_OK); - - if (!desc->MipLevels) - { -@@ -1044,7 +1044,7 @@ static bool d3d12_resource_validate_box(const struct d3d12_resource *resource, - depth = d3d12_resource_desc_get_depth(&resource->desc, mip_level); - - vkd3d_format = resource->format; -- assert(vkd3d_format); -+ VKD3D_ASSERT(vkd3d_format); - width_mask = vkd3d_format->block_width - 1; - height_mask = vkd3d_format->block_height - 1; - -@@ -1162,7 +1162,7 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 - - if (d3d12_resource_is_buffer(resource)) - { -- assert(subresource_count == 1); -+ VKD3D_ASSERT(subresource_count == 1); - - VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, resource->u.vk_buffer, &requirements)); - if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) -@@ -1381,7 +1381,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_GetDevice(ID3D12Resource2 *iface - - static void *d3d12_resource_get_map_ptr(struct d3d12_resource *resource) - { -- assert(resource->heap->map_ptr); -+ VKD3D_ASSERT(resource->heap->map_ptr); - return (uint8_t *)resource->heap->map_ptr + resource->heap_offset; - } - -@@ -1771,7 +1771,7 @@ struct d3d12_resource *unsafe_impl_from_ID3D12Resource(ID3D12Resource *iface) - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == (ID3D12ResourceVtbl *)&d3d12_resource_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == (ID3D12ResourceVtbl *)&d3d12_resource_vtbl); - return impl_from_ID3D12Resource(iface); - } - -@@ -1809,14 +1809,6 @@ static bool d3d12_resource_validate_texture_format(const D3D12_RESOURCE_DESC1 *d - return false; - } - -- if (align(desc->Width, format->block_width) != desc->Width -- || align(desc->Height, format->block_height) != desc->Height) -- { -- WARN("Invalid size %"PRIu64"x%u for block compressed format %#x.\n", -- desc->Width, desc->Height, desc->Format); -- return false; -- } -- - return true; - } - -@@ -2173,7 +2165,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, - - if (heap_offset > heap->desc.SizeInBytes || requirements.size > heap->desc.SizeInBytes - heap_offset) - { -- ERR("Heap too small for the resource (offset %"PRIu64", resource size %"PRIu64", heap size %"PRIu64".\n", -+ WARN("Heap too small for the resource (offset %"PRIu64", resource size %"PRIu64", heap size %"PRIu64".\n", - heap_offset, requirements.size, heap->desc.SizeInBytes); - return E_INVALIDARG; - } -@@ -2192,7 +2184,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, - goto allocate_memory; - } - -- /* Syncronisation is not required for binding, but vkMapMemory() may be called -+ /* Synchronisation is not required for binding, but vkMapMemory() may be called - * from another thread and it requires exclusive access. */ - vkd3d_mutex_lock(&heap->mutex); - -@@ -2414,7 +2406,7 @@ static struct vkd3d_view *vkd3d_view_create(uint32_t magic, VkDescriptorType vk_ - { - struct vkd3d_view *view; - -- assert(magic); -+ VKD3D_ASSERT(magic); - - if (!(view = vkd3d_desc_object_cache_get(&device->view_desc_cache))) - { -@@ -2544,7 +2536,7 @@ static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_hea - writes->vk_descriptor_writes[i].pTexelBufferView = &writes->null_vk_buffer_view; - break; - default: -- assert(false); -+ VKD3D_ASSERT(false); - break; - } - if (++i < ARRAY_SIZE(writes->vk_descriptor_writes) - 1) -@@ -2733,7 +2725,7 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struc - { - struct d3d12_desc tmp; - -- assert(dst != src); -+ VKD3D_ASSERT(dst != src); - - tmp.s.u.object = d3d12_desc_get_object_ref(src, device); - descriptor_heap_write_atomic(dst_heap, dst, &tmp, device); -@@ -2756,7 +2748,7 @@ static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12 - if (properties->storageTexelBufferOffsetSingleTexelAlignment - && properties->uniformTexelBufferOffsetSingleTexelAlignment) - { -- assert(!vkd3d_format_is_compressed(format)); -+ VKD3D_ASSERT(!vkd3d_format_is_compressed(format)); - return min(format->byte_count, alignment); - } - -@@ -2856,7 +2848,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, - return false; - } - -- assert(d3d12_resource_is_buffer(resource)); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(resource)); - - return vkd3d_create_buffer_view(device, magic, resource->u.vk_buffer, - format, offset * element_size, size * element_size, view); -@@ -2987,7 +2979,7 @@ static VkComponentSwizzle swizzle_vk_component(const VkComponentMapping *compone - break; - } - -- assert(component != VK_COMPONENT_SWIZZLE_IDENTITY); -+ VKD3D_ASSERT(component != VK_COMPONENT_SWIZZLE_IDENTITY); - return component; - } - -@@ -3519,8 +3511,8 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ - { - const struct vkd3d_format *format; - -- assert(d3d12_resource_is_buffer(counter_resource)); -- assert(desc->u.Buffer.StructureByteStride); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(counter_resource)); -+ VKD3D_ASSERT(desc->u.Buffer.StructureByteStride); - - format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); - if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, -@@ -3640,7 +3632,7 @@ bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, - } - - resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address); -- assert(d3d12_resource_is_buffer(resource)); -+ VKD3D_ASSERT(d3d12_resource_is_buffer(resource)); - return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format, - gpu_address - resource->gpu_address, VK_WHOLE_SIZE, vk_buffer_view); - } -@@ -3912,7 +3904,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev - vkd3d_desc.layer_count = resource->desc.DepthOrArraySize; - } - -- assert(d3d12_resource_is_texture(resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(resource)); - - if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_RTV, resource->u.vk_image, &vkd3d_desc, &view)) - return; -@@ -3998,7 +3990,7 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev - } - } - -- assert(d3d12_resource_is_texture(resource)); -+ VKD3D_ASSERT(d3d12_resource_is_texture(resource)); - - if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_DSV, resource->u.vk_image, &vkd3d_desc, &view)) - return; -@@ -4357,7 +4349,11 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript - return hr; - - descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); -- d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); -+ if (FAILED(hr = d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc))) -+ { -+ vkd3d_private_store_destroy(&descriptor_heap->private_store); -+ return hr; -+ } - vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); - - d3d12_device_add_ref(descriptor_heap->device = device); -@@ -4563,7 +4559,7 @@ struct d3d12_query_heap *unsafe_impl_from_ID3D12QueryHeap(ID3D12QueryHeap *iface - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_query_heap_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_query_heap_vtbl); - return impl_from_ID3D12QueryHeap(iface); - } - -diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c -index 7197193523d..d9d200e4850 100644 ---- a/libs/vkd3d/libs/vkd3d/state.c -+++ b/libs/vkd3d/libs/vkd3d/state.c -@@ -194,7 +194,7 @@ struct d3d12_root_signature *unsafe_impl_from_ID3D12RootSignature(ID3D12RootSign - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_root_signature_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_root_signature_vtbl); - return impl_from_ID3D12RootSignature(iface); - } - -@@ -345,15 +345,93 @@ struct d3d12_root_signature_info - unsigned int sampler_unbounded_range_count; - - size_t cost; -+ -+ struct d3d12_root_signature_info_range -+ { -+ enum vkd3d_shader_descriptor_type type; -+ unsigned int space; -+ unsigned int base_idx; -+ unsigned int count; -+ D3D12_SHADER_VISIBILITY visibility; -+ } *ranges; -+ size_t range_count, range_capacity; - }; - -+static HRESULT d3d12_root_signature_info_add_range(struct d3d12_root_signature_info *info, -+ enum vkd3d_shader_descriptor_type type, D3D12_SHADER_VISIBILITY visibility, -+ unsigned int space, unsigned int base_idx, unsigned int count) -+{ -+ struct d3d12_root_signature_info_range *range; -+ -+ if (!vkd3d_array_reserve((void **)&info->ranges, &info->range_capacity, info->range_count + 1, -+ sizeof(*info->ranges))) -+ return E_OUTOFMEMORY; -+ -+ range = &info->ranges[info->range_count++]; -+ range->type = type; -+ range->space = space; -+ range->base_idx = base_idx; -+ range->count = count; -+ range->visibility = visibility; -+ -+ return S_OK; -+} -+ -+static int d3d12_root_signature_info_range_compare(const void *a, const void *b) -+{ -+ const struct d3d12_root_signature_info_range *range_a = a, *range_b = b; -+ int ret; -+ -+ if ((ret = vkd3d_u32_compare(range_a->type, range_b->type))) -+ return ret; -+ -+ if ((ret = vkd3d_u32_compare(range_a->space, range_b->space))) -+ return ret; -+ -+ return vkd3d_u32_compare(range_a->base_idx, range_b->base_idx); -+} -+ -+static HRESULT d3d12_root_signature_info_range_validate(const struct d3d12_root_signature_info_range *ranges, -+ unsigned int count, D3D12_SHADER_VISIBILITY visibility) -+{ -+ const struct d3d12_root_signature_info_range *range, *next; -+ unsigned int i = 0, j; -+ -+ while (i < count) -+ { -+ range = &ranges[i]; -+ -+ for (j = i + 1; j < count; ++j) -+ { -+ next = &ranges[j]; -+ -+ if (range->visibility != D3D12_SHADER_VISIBILITY_ALL -+ && next->visibility != D3D12_SHADER_VISIBILITY_ALL -+ && range->visibility != next->visibility) -+ continue; -+ -+ if (range->type == next->type && range->space == next->space -+ && range->base_idx + range->count > next->base_idx) -+ return E_INVALIDARG; -+ -+ break; -+ } -+ -+ i = j; -+ } -+ -+ return S_OK; -+} -+ - static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info, -- const D3D12_ROOT_DESCRIPTOR_TABLE *table, bool use_array) -+ const D3D12_ROOT_PARAMETER *param, bool use_array) - { - bool cbv_unbounded_range = false, srv_unbounded_range = false, uav_unbounded_range = false; -+ const D3D12_ROOT_DESCRIPTOR_TABLE *table = ¶m->u.DescriptorTable; - bool sampler_unbounded_range = false; - bool unbounded = false; - unsigned int i, count; -+ HRESULT hr; - - for (i = 0; i < table->NumDescriptorRanges; ++i) - { -@@ -381,6 +459,12 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig - } - - count = range->NumDescriptors; -+ -+ if (FAILED(hr = d3d12_root_signature_info_add_range(info, -+ vkd3d_descriptor_type_from_d3d12_range_type(range->RangeType), -+ param->ShaderVisibility, range->RegisterSpace, range->BaseShaderRegister, count))) -+ return hr; -+ - if (range->NumDescriptors == UINT_MAX) - { - unbounded = true; -@@ -453,8 +537,8 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i - { - case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: - if (FAILED(hr = d3d12_root_signature_info_count_descriptors(info, -- &p->u.DescriptorTable, use_array))) -- return hr; -+ p, use_array))) -+ goto done; - ++info->cost; - break; - -@@ -463,35 +547,80 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i - ++info->cbv_count; - ++info->binding_count; - info->cost += 2; -+ if (FAILED(hr = d3d12_root_signature_info_add_range(info, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, p->ShaderVisibility, -+ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1))) -+ goto done; - break; -+ - case D3D12_ROOT_PARAMETER_TYPE_SRV: - ++info->root_descriptor_count; - ++info->srv_count; - ++info->binding_count; - info->cost += 2; -+ if (FAILED(hr = d3d12_root_signature_info_add_range(info, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, p->ShaderVisibility, -+ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1))) -+ goto done; - break; -+ - case D3D12_ROOT_PARAMETER_TYPE_UAV: - ++info->root_descriptor_count; - ++info->uav_count; - ++info->binding_count; - info->cost += 2; -+ if (FAILED(hr = d3d12_root_signature_info_add_range(info, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, p->ShaderVisibility, -+ p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1))) -+ goto done; - break; - - case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: - ++info->root_constant_count; - info->cost += p->u.Constants.Num32BitValues; -+ if (FAILED(hr = d3d12_root_signature_info_add_range(info, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, p->ShaderVisibility, -+ p->u.Constants.RegisterSpace, p->u.Constants.ShaderRegister, 1))) -+ goto done; - break; - - default: - FIXME("Unhandled type %#x for parameter %u.\n", p->ParameterType, i); -- return E_NOTIMPL; -+ hr = E_NOTIMPL; -+ goto done; - } - } - - info->binding_count += desc->NumStaticSamplers; - info->sampler_count += desc->NumStaticSamplers; - -- return S_OK; -+ for (i = 0; i < desc->NumStaticSamplers; ++i) -+ { -+ const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; -+ -+ if (FAILED(hr = d3d12_root_signature_info_add_range(info, -+ VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->ShaderVisibility, -+ s->RegisterSpace, s->ShaderRegister, 1))) -+ goto done; -+ } -+ -+ qsort(info->ranges, info->range_count, sizeof(*info->ranges), -+ d3d12_root_signature_info_range_compare); -+ -+ for (i = D3D12_SHADER_VISIBILITY_VERTEX; i <= D3D12_SHADER_VISIBILITY_MESH; ++i) -+ { -+ if (FAILED(hr = d3d12_root_signature_info_range_validate(info->ranges, info->range_count, i))) -+ goto done; -+ } -+ -+ hr = S_OK; -+done: -+ vkd3d_free(info->ranges); -+ info->ranges = NULL; -+ info->range_count = 0; -+ info->range_capacity = 0; -+ -+ return hr; - } - - static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signature *root_signature, -@@ -509,14 +638,18 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat - for (i = 0; i < desc->NumParameters; ++i) - { - const D3D12_ROOT_PARAMETER *p = &desc->pParameters[i]; -+ D3D12_SHADER_VISIBILITY visibility; -+ - if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS) - continue; - -- assert(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL); -- push_constants[p->ShaderVisibility].stageFlags = use_vk_heaps ? VK_SHADER_STAGE_ALL -- : stage_flags_from_visibility(p->ShaderVisibility); -- push_constants[p->ShaderVisibility].size += align(p->u.Constants.Num32BitValues, 4) * sizeof(uint32_t); -+ visibility = use_vk_heaps ? D3D12_SHADER_VISIBILITY_ALL : p->ShaderVisibility; -+ VKD3D_ASSERT(visibility <= D3D12_SHADER_VISIBILITY_PIXEL); -+ -+ push_constants[visibility].stageFlags = stage_flags_from_visibility(visibility); -+ push_constants[visibility].size += align(p->u.Constants.Num32BitValues, 4) * sizeof(uint32_t); - } -+ - if (push_constants[D3D12_SHADER_VISIBILITY_ALL].size) - { - /* When D3D12_SHADER_VISIBILITY_ALL is used we use a single push -@@ -645,7 +778,7 @@ static HRESULT d3d12_root_signature_append_descriptor_set_layout(struct d3d12_ro - return S_OK; - } - --static void d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, -+static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, - enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int register_idx, - bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, - unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context) -@@ -670,33 +803,38 @@ static void d3d12_root_signature_append_vk_binding(struct d3d12_root_signature * - } - - if (context->unbounded_offset != UINT_MAX) -- d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); -+ return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); -+ -+ return S_OK; - } - --static uint32_t d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, -+static HRESULT d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, - enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int base_register_idx, - unsigned int binding_count, bool is_buffer_descriptor, bool duplicate_descriptors, -- enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) -+ enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context, -+ uint32_t *first_binding) - { -- uint32_t first_binding; - unsigned int i; -+ HRESULT hr; - - is_buffer_descriptor |= descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; - duplicate_descriptors = (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV - || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) - && duplicate_descriptors; - -- first_binding = context->descriptor_binding; -+ *first_binding = context->descriptor_binding; - for (i = 0; i < binding_count; ++i) - { -- if (duplicate_descriptors) -- d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, -- base_register_idx + i, true, shader_visibility, 1, context); -+ if (duplicate_descriptors -+ && FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, -+ register_space, base_register_idx + i, true, shader_visibility, 1, context))) -+ return hr; - -- d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, -- base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context); -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, -+ base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context))) -+ return hr; - } -- return first_binding; -+ return S_OK; - } - - static uint32_t vkd3d_descriptor_magic_from_d3d12(D3D12_DESCRIPTOR_RANGE_TYPE type) -@@ -764,6 +902,7 @@ static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_r - enum vkd3d_shader_visibility shader_visibility = vkd3d_shader_visibility_from_d3d12(visibility); - bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; - enum vkd3d_shader_descriptor_type descriptor_type = range->type; -+ HRESULT hr; - - if (range->descriptor_count == UINT_MAX) - context->unbounded_offset = range->offset; -@@ -775,8 +914,9 @@ static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_r - return E_NOTIMPL; - ++context->current_binding; - -- d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, -- range->base_register_idx, true, shader_visibility, range->vk_binding_count, context); -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, -+ range->base_register_idx, true, shader_visibility, range->vk_binding_count, context))) -+ return hr; - } - - if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, -@@ -784,8 +924,9 @@ static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_r - return E_NOTIMPL; - ++context->current_binding; - -- d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, -- range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context); -+ if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, -+ range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context))) -+ return hr; - - context->unbounded_offset = UINT_MAX; - -@@ -955,20 +1096,6 @@ static void d3d12_root_signature_map_descriptor_unbounded_binding(struct d3d12_r - descriptor_offset, is_buffer, shader_visibility, context); - } - --static int compare_register_range(const void *a, const void *b) --{ -- const struct d3d12_root_descriptor_table_range *range_a = a, *range_b = b; -- int ret; -- -- if ((ret = vkd3d_u32_compare(range_a->type, range_b->type))) -- return ret; -- -- if ((ret = vkd3d_u32_compare(range_a->register_space, range_b->register_space))) -- return ret; -- -- return vkd3d_u32_compare(range_a->base_register_idx, range_b->base_register_idx); --} -- - static int compare_descriptor_range(const void *a, const void *b) - { - const struct d3d12_root_descriptor_table_range *range_a = a, *range_b = b; -@@ -983,25 +1110,6 @@ static int compare_descriptor_range(const void *a, const void *b) - return (range_a->descriptor_count == UINT_MAX) - (range_b->descriptor_count == UINT_MAX); - } - --static HRESULT validate_descriptor_register_ranges(const struct d3d12_root_descriptor_table_range *ranges, -- unsigned int count) --{ -- const struct d3d12_root_descriptor_table_range *range, *prev; -- unsigned int i; -- -- for (i = 1; i < count; ++i) -- { -- range = &ranges[i]; -- prev = &ranges[i - 1]; -- -- if (range->type == prev->type && range->register_space == prev->register_space -- && range->base_register_idx - prev->base_register_idx < prev->descriptor_count) -- return E_INVALIDARG; -- } -- -- return S_OK; --} -- - static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature, - const D3D12_ROOT_SIGNATURE_DESC *desc, const struct d3d12_root_signature_info *info, - struct vkd3d_descriptor_set_context *context) -@@ -1062,10 +1170,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - offset += range->NumDescriptors; - } - -- qsort(table->ranges, range_count, sizeof(*table->ranges), compare_register_range); -- if (FAILED(hr = validate_descriptor_register_ranges(table->ranges, range_count))) -- return hr; -- - qsort(table->ranges, range_count, sizeof(*table->ranges), compare_descriptor_range); - - for (j = 0; j < range_count; ++j) -@@ -1130,9 +1234,10 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo - - cur_binding = context->current_binding; - -- vk_binding = d3d12_root_signature_assign_vk_bindings(root_signature, -+ if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, - range->type, range->register_space, range->base_register_idx, range->descriptor_count, false, true, -- shader_visibility, context); -+ shader_visibility, context, &vk_binding))) -+ return hr; - - /* Unroll descriptor range. */ - for (k = 0; k < range->descriptor_count; ++k) -@@ -1175,6 +1280,7 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign - { - VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; - unsigned int i; -+ HRESULT hr; - - root_signature->push_descriptor_mask = 0; - -@@ -1188,10 +1294,11 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign - - root_signature->push_descriptor_mask |= 1u << i; - -- cur_binding->binding = d3d12_root_signature_assign_vk_bindings(root_signature, -+ if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, - vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), - p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1, true, false, -- vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context); -+ vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context, &cur_binding->binding))) -+ return hr; - cur_binding->descriptorType = vk_descriptor_type_from_d3d12_root_parameter(p->ParameterType); - cur_binding->descriptorCount = 1; - cur_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility); -@@ -1215,7 +1322,7 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa - unsigned int i; - HRESULT hr; - -- assert(root_signature->static_sampler_count == desc->NumStaticSamplers); -+ VKD3D_ASSERT(root_signature->static_sampler_count == desc->NumStaticSamplers); - for (i = 0; i < desc->NumStaticSamplers; ++i) - { - const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; -@@ -1223,9 +1330,10 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa - if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) - return hr; - -- cur_binding->binding = d3d12_root_signature_assign_vk_bindings(root_signature, -+ if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, - VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, 1, false, false, -- vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context); -+ vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context, &cur_binding->binding))) -+ return hr; - cur_binding->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; - cur_binding->descriptorCount = 1; - cur_binding->stageFlags = stage_flags_from_visibility(s->ShaderVisibility); -@@ -1600,7 +1708,7 @@ static HRESULT vkd3d_render_pass_cache_create_pass_locked(struct vkd3d_render_pa - - have_depth_stencil = key->depth_enable || key->stencil_enable; - rt_count = have_depth_stencil ? key->attachment_count - 1 : key->attachment_count; -- assert(rt_count <= D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT); -+ VKD3D_ASSERT(rt_count <= D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT); - - for (index = 0, attachment_index = 0; index < rt_count; ++index) - { -@@ -2140,7 +2248,7 @@ struct d3d12_pipeline_state *unsafe_impl_from_ID3D12PipelineState(ID3D12Pipeline - { - if (!iface) - return NULL; -- assert(iface->lpVtbl == &d3d12_pipeline_state_vtbl); -+ VKD3D_ASSERT(iface->lpVtbl == &d3d12_pipeline_state_vtbl); - return impl_from_ID3D12PipelineState(iface); - } - -@@ -2296,7 +2404,7 @@ static HRESULT d3d12_pipeline_state_init_uav_counters(struct d3d12_pipeline_stat - unsigned int i, j; - HRESULT hr; - -- assert(vkd3d_popcount(stage_flags) == 1); -+ VKD3D_ASSERT(vkd3d_popcount(stage_flags) == 1); - - for (i = 0; i < shader_info->descriptor_count; ++i) - { -@@ -2911,7 +3019,7 @@ static HRESULT d3d12_graphics_pipeline_state_create_render_pass( - - if (dsv_format) - { -- assert(graphics->ds_desc.front.writeMask == graphics->ds_desc.back.writeMask); -+ VKD3D_ASSERT(graphics->ds_desc.front.writeMask == graphics->ds_desc.back.writeMask); - key.depth_enable = graphics->ds_desc.depthTestEnable; - key.stencil_enable = graphics->ds_desc.stencilTestEnable; - key.depth_stencil_write = graphics->ds_desc.depthWriteEnable -@@ -2928,7 +3036,7 @@ static HRESULT d3d12_graphics_pipeline_state_create_render_pass( - if (key.attachment_count != ARRAY_SIZE(key.vk_formats)) - key.vk_formats[ARRAY_SIZE(key.vk_formats) - 1] = VK_FORMAT_UNDEFINED; - for (i = key.attachment_count; i < ARRAY_SIZE(key.vk_formats); ++i) -- assert(key.vk_formats[i] == VK_FORMAT_UNDEFINED); -+ VKD3D_ASSERT(key.vk_formats[i] == VK_FORMAT_UNDEFINED); - - key.padding = 0; - key.sample_count = graphics->ms_desc.rasterizationSamples; -@@ -3476,7 +3584,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s - graphics->ms_desc.pSampleMask = NULL; - if (desc->sample_mask != ~0u) - { -- assert(DIV_ROUND_UP(sample_count, 32) <= ARRAY_SIZE(graphics->sample_mask)); -+ VKD3D_ASSERT(DIV_ROUND_UP(sample_count, 32) <= ARRAY_SIZE(graphics->sample_mask)); - graphics->sample_mask[0] = desc->sample_mask; - graphics->sample_mask[1] = 0xffffffffu; - graphics->ms_desc.pSampleMask = graphics->sample_mask; -@@ -3769,7 +3877,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta - .pDynamicStates = dynamic_states, - }; - -- assert(d3d12_pipeline_state_is_graphics(state)); -+ VKD3D_ASSERT(d3d12_pipeline_state_is_graphics(state)); - - memset(&pipeline_key, 0, sizeof(pipeline_key)); - pipeline_key.topology = topology; -diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c -index 11029c9f5f9..831dc07af56 100644 ---- a/libs/vkd3d/libs/vkd3d/utils.c -+++ b/libs/vkd3d/libs/vkd3d/utils.c -@@ -331,7 +331,7 @@ static HRESULT vkd3d_init_format_compatibility_lists(struct d3d12_device *device - - if (j >= current_list->format_count) - { -- assert(current_list->format_count < VKD3D_MAX_COMPATIBLE_FORMAT_COUNT); -+ VKD3D_ASSERT(current_list->format_count < VKD3D_MAX_COMPATIBLE_FORMAT_COUNT); - current_list->vk_formats[current_list->format_count++] = vk_format; - } - } -@@ -427,7 +427,7 @@ static const struct vkd3d_format *vkd3d_get_depth_stencil_format(const struct d3 - const struct vkd3d_format *formats; - unsigned int i; - -- assert(device); -+ VKD3D_ASSERT(device); - formats = device->depth_stencil_formats; - - for (i = 0; i < ARRAY_SIZE(vkd3d_depth_stencil_formats); ++i) -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -index c7431bd821b..9eccec111c7 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c -@@ -38,12 +38,12 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, - } - if (!create_info->instance && !create_info->instance_create_info) - { -- ERR("Instance or instance create info is required.\n"); -+ WARN("Instance or instance create info is required.\n"); - return E_INVALIDARG; - } - if (create_info->instance && create_info->instance_create_info) - { -- ERR("Instance and instance create info are mutually exclusive parameters.\n"); -+ WARN("Instance and instance create info are mutually exclusive parameters.\n"); - return E_INVALIDARG; - } - -@@ -153,7 +153,7 @@ static const D3D12_ROOT_SIGNATURE_DESC * STDMETHODCALLTYPE d3d12_root_signature_ - - TRACE("iface %p.\n", iface); - -- assert(deserializer->desc.d3d12.Version == D3D_ROOT_SIGNATURE_VERSION_1_0); -+ VKD3D_ASSERT(deserializer->desc.d3d12.Version == D3D_ROOT_SIGNATURE_VERSION_1_0); - return &deserializer->desc.d3d12.u.Desc_1_0; - } - -@@ -354,7 +354,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_versioned_root_signature_deserializer_Get - } - } - -- assert(deserializer->other_desc.d3d12.Version == version); -+ VKD3D_ASSERT(deserializer->other_desc.d3d12.Version == version); - *desc = &deserializer->other_desc.d3d12; - return S_OK; - } -diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -index d1fa866d9e3..ba4e2e8488d 100644 ---- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h -+++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h -@@ -37,7 +37,6 @@ - #include "vkd3d.h" - #include "vkd3d_shader.h" - --#include - #include - #include - #include -@@ -123,6 +122,7 @@ struct vkd3d_vulkan_info - bool KHR_image_format_list; - bool KHR_maintenance2; - bool KHR_maintenance3; -+ bool KHR_portability_subset; - bool KHR_push_descriptor; - bool KHR_sampler_mirror_clamp_to_edge; - bool KHR_timeline_semaphore; -@@ -145,6 +145,8 @@ struct vkd3d_vulkan_info - - bool rasterization_stream; - bool transform_feedback_queries; -+ bool geometry_shaders; -+ bool tessellation_shaders; - - bool uav_read_without_format; - -@@ -676,7 +678,7 @@ static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc * - void *view; - - /* Some games, e.g. Shadow of the Tomb Raider, GRID 2019, and Horizon Zero Dawn, write descriptors -- * from multiple threads without syncronisation. This is apparently valid in Windows. */ -+ * from multiple threads without synchronisation. This is apparently valid in Windows. */ - for (;;) - { - do -@@ -784,8 +786,8 @@ extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[]; - static inline enum vkd3d_vk_descriptor_set_index vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( - VkDescriptorType type) - { -- assert(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); -- assert(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT); -+ VKD3D_ASSERT(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); -+ VKD3D_ASSERT(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT); - - return vk_descriptor_set_index_table[type]; - } -@@ -1229,7 +1231,7 @@ enum vkd3d_pipeline_bind_point - /* ID3D12CommandList */ - struct d3d12_command_list - { -- ID3D12GraphicsCommandList5 ID3D12GraphicsCommandList5_iface; -+ ID3D12GraphicsCommandList6 ID3D12GraphicsCommandList6_iface; - unsigned int refcount; - - D3D12_COMMAND_LIST_TYPE type; -@@ -1753,7 +1755,6 @@ static inline void vk_prepend_struct(void *header, void *structure) - { - VkBaseOutStructure *vk_header = header, *vk_structure = structure; - -- assert(!vk_structure->pNext); - vk_structure->pNext = vk_header->pNext; - vk_header->pNext = vk_structure; - } -@@ -1766,7 +1767,7 @@ static inline void vkd3d_prepend_struct(void *header, void *structure) - const void *next; - } *vkd3d_header = header, *vkd3d_structure = structure; - -- assert(!vkd3d_structure->next); -+ VKD3D_ASSERT(!vkd3d_structure->next); - vkd3d_structure->next = vkd3d_header->next; - vkd3d_header->next = vkd3d_structure; - } --- -2.45.2 - diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-eb71a1722a95894c0546a597b70469d7962.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-eb71a1722a95894c0546a597b70469d7962.patch deleted file mode 100644 index 294e3fd1..00000000 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-eb71a1722a95894c0546a597b70469d7962.patch +++ /dev/null @@ -1,80 +0,0 @@ -From d3bcd5849cdbc5a2f6139ab7acf840f9d1b41898 Mon Sep 17 00:00:00 2001 -From: Alistair Leslie-Hughes -Date: Thu, 29 Aug 2024 07:59:15 +1000 -Subject: [PATCH] Updated vkd3d to eb71a1722a95894c0546a597b70469d7962ba77a. - ---- - libs/vkd3d/include/vkd3d_shader.h | 29 +++++++++++++++++++++++++++-- - libs/vkd3d/libs/vkd3d-shader/ir.c | 2 +- - 2 files changed, 28 insertions(+), 3 deletions(-) - -diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h -index d37d8ebad9e..fc583b542ee 100644 ---- a/libs/vkd3d/include/vkd3d_shader.h -+++ b/libs/vkd3d/include/vkd3d_shader.h -@@ -583,7 +583,7 @@ enum vkd3d_shader_parameter_name - - /** - * The value of an immediate constant parameter, used in -- * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -+ * struct vkd3d_shader_parameter. - */ - struct vkd3d_shader_parameter_immediate_constant - { -@@ -604,6 +604,31 @@ struct vkd3d_shader_parameter_immediate_constant - } u; - }; - -+/** -+ * The value of an immediate constant parameter, used in -+ * struct vkd3d_shader_parameter1. -+ * -+ * \since 1.13 -+ */ -+struct vkd3d_shader_parameter_immediate_constant1 -+{ -+ union -+ { -+ /** -+ * The value if the parameter's data type is -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. -+ */ -+ uint32_t u32; -+ /** -+ * The value if the parameter's data type is -+ * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. -+ */ -+ float f32; -+ void *_pointer_pad; -+ uint32_t _pad[4]; -+ } u; -+}; -+ - /** - * The linkage of a specialization constant parameter, used in - * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. -@@ -689,7 +714,7 @@ struct vkd3d_shader_parameter1 - * Additional information if \a type is - * VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT. - */ -- struct vkd3d_shader_parameter_immediate_constant immediate_constant; -+ struct vkd3d_shader_parameter_immediate_constant1 immediate_constant; - /** - * Additional information if \a type is - * VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT. -diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c -index 6dbe30b1553..747238e2fee 100644 ---- a/libs/vkd3d/libs/vkd3d-shader/ir.c -+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c -@@ -45,7 +45,7 @@ static int convert_parameter_info(const struct vkd3d_shader_compile_info *compil - - if (src->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) - { -- dst->u.immediate_constant = src->u.immediate_constant; -+ dst->u.immediate_constant.u.u32 = src->u.immediate_constant.u.u32; - } - else if (src->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) - { --- -2.45.2 - diff --git a/staging/upstream-commit b/staging/upstream-commit index e5a4208c..b92ea674 100644 --- a/staging/upstream-commit +++ b/staging/upstream-commit @@ -1 +1 @@ -055bddab4f14a1f73e887b88b86408d654382c2b +8d2977ec12e234880199bc07daf49870d0aa64ec