From bdf5b5c756c711423757ec2173f1fefc8e26b6fb Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 7 Mar 2024 10:40:41 +1100 Subject: [PATCH] Updated vkd3d to c792114a6a58c7c97abf827d154d7ecd22d81536. --- libs/vkd3d/include/private/vkd3d_common.h | 6 +- libs/vkd3d/libs/vkd3d-common/blob.c | 1 + libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 25 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 616 ++++++++----- libs/vkd3d/libs/vkd3d-shader/dxil.c | 99 +- libs/vkd3d/libs/vkd3d-shader/fx.c | 536 +++++++++-- libs/vkd3d/libs/vkd3d-shader/glsl.c | 6 +- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 118 ++- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 99 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 7 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 858 ++++++++++++++---- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 689 ++++++++++++-- libs/vkd3d/libs/vkd3d-shader/ir.c | 175 ++-- libs/vkd3d/libs/vkd3d-shader/preproc.l | 1 + libs/vkd3d/libs/vkd3d-shader/spirv.c | 229 +++-- libs/vkd3d/libs/vkd3d-shader/tpf.c | 349 +++++-- .../libs/vkd3d-shader/vkd3d_shader_main.c | 30 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 13 +- libs/vkd3d/libs/vkd3d/command.c | 141 +-- libs/vkd3d/libs/vkd3d/device.c | 16 +- libs/vkd3d/libs/vkd3d/resource.c | 14 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 3 + 22 files changed, 3073 insertions(+), 958 deletions(-) diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index a9d709d10fe..e7b25602ec0 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -52,6 +52,10 @@ ((uint32_t)(ch0) | ((uint32_t)(ch1) << 8) \ | ((uint32_t)(ch2) << 16) | ((uint32_t)(ch3) << 24)) +#define VKD3D_EXPAND(x) x +#define VKD3D_STRINGIFY(x) #x +#define VKD3D_EXPAND_AND_STRINGIFY(x) VKD3D_EXPAND(VKD3D_STRINGIFY(x)) + #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') #define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') @@ -233,7 +237,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) { #ifdef _MSC_VER return __popcnt(v); -#elif defined(__MINGW32__) +#elif defined(HAVE_BUILTIN_POPCOUNT) return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c index f60ef7db769..c2c6ad67804 100644 --- a/libs/vkd3d/libs/vkd3d-common/blob.c +++ b/libs/vkd3d/libs/vkd3d-common/blob.c @@ -20,6 +20,7 @@ #define WIDL_C_INLINE_WRAPPERS #endif #define COBJMACROS + #define CONST_VTABLE #include "vkd3d.h" #include "vkd3d_blob.h" diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 9abc2c4db70..8a3eb5a367a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -254,6 +254,10 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_PHASE ] = "phase", [VKD3DSIH_PHI ] = "phi", [VKD3DSIH_POW ] = "pow", + [VKD3DSIH_QUAD_READ_ACROSS_D ] = "quad_read_across_d", + [VKD3DSIH_QUAD_READ_ACROSS_X ] = "quad_read_across_x", + [VKD3DSIH_QUAD_READ_ACROSS_Y ] = "quad_read_across_y", + [VKD3DSIH_QUAD_READ_LANE_AT ] = "quad_read_lane_at", [VKD3DSIH_RCP ] = "rcp", [VKD3DSIH_REP ] = "rep", [VKD3DSIH_RESINFO ] = "resinfo", @@ -1199,7 +1203,7 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const { bool untyped = false; - switch (compiler->current->handler_idx) + switch (compiler->current->opcode) { case VKD3DSIH_MOV: case VKD3DSIH_MOVC: @@ -1755,7 +1759,7 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile { struct vkd3d_string_buffer *buffer = &compiler->buffer; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_BREAKP: case VKD3DSIH_CONTINUEP: @@ -1853,8 +1857,13 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile break; case VKD3DSIH_TEX: - if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) - vkd3d_string_buffer_printf(buffer, "p"); + if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0)) + { + if (ins->flags & VKD3DSI_TEXLD_PROJECT) + vkd3d_string_buffer_printf(buffer, "p"); + else if (ins->flags & VKD3DSI_TEXLD_BIAS) + vkd3d_string_buffer_printf(buffer, "b"); + } break; case VKD3DSIH_WAVE_OP_ADD: @@ -1937,9 +1946,9 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, if (ins->coissue) vkd3d_string_buffer_printf(buffer, "+"); - shader_print_opcode(compiler, ins->handler_idx); + shader_print_opcode(compiler, ins->opcode); - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_DCL: case VKD3DSIH_DCL_UAV_TYPED: @@ -2430,7 +2439,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_ELSE: case VKD3DSIH_ENDIF: @@ -2459,7 +2468,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, shader_dump_instruction(&compiler, ins); - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_ELSE: case VKD3DSIH_IF: diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index bfd5b52b436..4522d56c5c9 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -1060,7 +1060,7 @@ static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) { - if ((ins->handler_idx == VKD3DSIH_BREAKP || ins->handler_idx == VKD3DSIH_IF) && ins->flags) + if ((ins->opcode == VKD3DSIH_BREAKP || ins->opcode == VKD3DSIH_IF) && ins->flags) { vkd3d_shader_parser_warning(&sm1->p, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS, "Ignoring unexpected instruction flags %#x.", ins->flags); @@ -1142,23 +1142,23 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str goto fail; } - if (ins->handler_idx == VKD3DSIH_DCL) + if (ins->opcode == VKD3DSIH_DCL) { shader_sm1_read_semantic(sm1, &p, &ins->declaration.semantic); } - else if (ins->handler_idx == VKD3DSIH_DEF) + else if (ins->opcode == VKD3DSIH_DEF) { shader_sm1_read_dst_param(sm1, &p, dst_param); shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_FLOAT); shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); } - else if (ins->handler_idx == VKD3DSIH_DEFB) + else if (ins->opcode == VKD3DSIH_DEFB) { shader_sm1_read_dst_param(sm1, &p, dst_param); shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_SCALAR, VKD3D_DATA_UINT); shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); } - else if (ins->handler_idx == VKD3DSIH_DEFI) + else if (ins->opcode == VKD3DSIH_DEFI) { shader_sm1_read_dst_param(sm1, &p, dst_param); shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_INT); @@ -1195,7 +1195,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str return; fail: - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; *ptr = sm1->end; } @@ -1326,7 +1326,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c ins = &instructions->elements[instructions->count]; shader_sm1_read_instruction(&sm1, ins); - if (ins->handler_idx == VKD3DSIH_INVALID) + if (ins->opcode == VKD3DSIH_INVALID) { WARN("Encountered unrecognized or invalid instruction.\n"); vsir_program_cleanup(program); @@ -1354,8 +1354,8 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c return ret; } -bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) +bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, + unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) { unsigned int i; @@ -1365,56 +1365,56 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem bool output; enum vkd3d_shader_type shader_type; unsigned int major_version; - D3DSHADER_PARAM_REGISTER_TYPE type; + enum vkd3d_shader_register_type type; unsigned int offset; } register_table[] = { - {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, - {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, - {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, - {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, - {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, - {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, + {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_FACE}, + {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, }; for (i = 0; i < ARRAY_SIZE(register_table); ++i) { - if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type - && ctx->profile->major_version == register_table[i].major_version) + && version->type == register_table[i].shader_type + && version->major == register_table[i].major_version) { *type = register_table[i].type; - if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) + if (register_table[i].type == VKD3DSPR_MISCTYPE || register_table[i].type == VKD3DSPR_RASTOUT) *reg = register_table[i].offset; else - *reg = semantic->index; + *reg = semantic_index; return true; } } @@ -1422,7 +1422,8 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem return false; } -bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) +bool hlsl_sm1_usage_from_semantic(const char *semantic_name, + uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) { static const struct { @@ -1454,10 +1455,10 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU for (i = 0; i < ARRAY_SIZE(semantics); ++i) { - if (!ascii_strcasecmp(semantic->name, semantics[i].name)) + if (!ascii_strcasecmp(semantic_name, semantics[i].name)) { *usage = semantics[i].usage; - *usage_idx = semantic->index; + *usage_idx = semantic_index; return true; } } @@ -1465,6 +1466,17 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU return false; } +struct d3dbc_compiler +{ + struct vsir_program *program; + struct vkd3d_bytecode_buffer buffer; + struct vkd3d_shader_message_context *message_context; + + /* OBJECTIVE: Store all the required information in the other fields so + * that this hlsl_ctx is no longer necessary. */ + struct hlsl_ctx *ctx; +}; + static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) { if (type == VKD3D_SHADER_TYPE_VERTEX) @@ -1497,13 +1509,16 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) case HLSL_CLASS_TEXTURE: case HLSL_CLASS_VERTEX_SHADER: return D3DXPC_OBJECT; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_UAV: case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: break; } @@ -1593,13 +1608,16 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) case HLSL_CLASS_VERTEX_SHADER: return D3DXPT_VERTEXSHADER; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_UAV: case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: break; } @@ -1677,8 +1695,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) list_move_tail(&ctx->extern_vars, &sorted); } -static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - struct hlsl_ir_function_decl *entry_func) +void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) { size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; unsigned int uniform_count = 0; @@ -1739,11 +1756,11 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe } else { - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); put_u32(buffer, var->bind_count[r]); } put_u32(buffer, 0); /* type */ - put_u32(buffer, 0); /* FIXME: default value */ + put_u32(buffer, 0); /* default value */ } } @@ -1767,6 +1784,62 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe write_sm1_type(buffer, var->data_type, ctab_start); set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); + + if (var->default_values) + { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int comp_count = hlsl_type_component_count(var->data_type); + unsigned int default_value_offset; + unsigned int k; + + default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); + set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); + + for (k = 0; k < comp_count; ++k) + { + struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); + unsigned int comp_offset; + enum hlsl_regset regset; + + comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); + if (regset == HLSL_REGSET_NUMERIC) + { + union + { + uint32_t u; + float f; + } uni; + + switch (comp_type->e.numeric.type) + { + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &var->loc, "Write double default values."); + uni.u = 0; + break; + + case HLSL_TYPE_INT: + uni.f = var->default_values[k].value.i; + break; + + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + uni.f = var->default_values[k].value.u; + break; + + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + uni.u = var->default_values[k].value.u; + break; + + default: + vkd3d_unreachable(); + } + + set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); + } + } + } + ++uniform_count; } } @@ -1778,7 +1851,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); } -static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) +static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) { return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); @@ -1791,7 +1864,7 @@ struct sm1_instruction struct sm1_dst_register { - D3DSHADER_PARAM_REGISTER_TYPE type; + enum vkd3d_shader_register_type type; D3DSHADER_PARAM_DSTMOD_TYPE mod; unsigned int writemask; uint32_t reg; @@ -1799,16 +1872,42 @@ struct sm1_instruction struct sm1_src_register { - D3DSHADER_PARAM_REGISTER_TYPE type; + enum vkd3d_shader_register_type type; D3DSHADER_PARAM_SRCMOD_TYPE mod; unsigned int swizzle; uint32_t reg; - } srcs[3]; + } srcs[4]; unsigned int src_count; unsigned int has_dst; }; +static bool is_inconsequential_instr(const struct sm1_instruction *instr) +{ + const struct sm1_src_register *src = &instr->srcs[0]; + const struct sm1_dst_register *dst = &instr->dst; + unsigned int i; + + if (instr->opcode != D3DSIO_MOV) + return false; + if (dst->mod != D3DSPDM_NONE) + return false; + if (src->mod != D3DSPSM_NONE) + return false; + if (src->type != dst->type) + return false; + if (src->reg != dst->reg) + return false; + + for (i = 0; i < 4; ++i) + { + if ((dst->writemask & (1 << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) + return false; + } + + return true; +} + static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) { assert(reg->writemask); @@ -1821,15 +1920,19 @@ static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); } -static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct sm1_instruction *instr) +static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; uint32_t token = instr->opcode; unsigned int i; + if (is_inconsequential_instr(instr)) + return; + token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); - if (ctx->profile->major_version > 1) + if (version->major > 1) token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; put_u32(buffer, token); @@ -1845,54 +1948,53 @@ static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_w src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); } -static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, - const struct hlsl_reg *src3) +static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) { struct sm1_instruction instr = { .opcode = D3DSIO_DP2ADD, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.writemask = dst->writemask, .dst.reg = dst->id, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, + .srcs[1].type = VKD3DSPR_TEMP, .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), .srcs[1].reg = src2->id, - .srcs[2].type = D3DSPR_TEMP, + .srcs[2].type = VKD3DSPR_TEMP, .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), .srcs[2].reg = src3->id, .src_count = 3, }; - write_sm1_instruction(ctx, buffer, &instr); + d3dbc_write_instruction(d3dbc, &instr); } -static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, const struct hlsl_reg *src1, - const struct hlsl_reg *src2, const struct hlsl_reg *src3) +static void d3dbc_write_ternary_op(struct d3dbc_compiler *d3dbc, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) { struct sm1_instruction instr = { .opcode = opcode, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.writemask = dst->writemask, .dst.reg = dst->id, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, + .srcs[1].type = VKD3DSPR_TEMP, .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), .srcs[1].reg = src2->id, - .srcs[2].type = D3DSPR_TEMP, + .srcs[2].type = VKD3DSPR_TEMP, .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), .srcs[2].reg = src3->id, .src_count = 3, @@ -1901,26 +2003,25 @@ static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buf sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); + d3dbc_write_instruction(d3dbc, &instr); } -static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2) +static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, + const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) { struct sm1_instruction instr = { .opcode = opcode, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.writemask = dst->writemask, .dst.reg = dst->id, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, + .srcs[1].type = VKD3DSPR_TEMP, .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), .srcs[1].reg = src2->id, .src_count = 2, @@ -1928,49 +2029,48 @@ static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buff sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); + d3dbc_write_instruction(d3dbc, &instr); } -static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2) +static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, + const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) { struct sm1_instruction instr = { .opcode = opcode, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.writemask = dst->writemask, .dst.reg = dst->id, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, + .srcs[1].type = VKD3DSPR_TEMP, .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), .srcs[1].reg = src2->id, .src_count = 2, }; - write_sm1_instruction(ctx, buffer, &instr); + d3dbc_write_instruction(d3dbc, &instr); } -static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) +static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, + const struct hlsl_reg *dst, const struct hlsl_reg *src, + D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) { struct sm1_instruction instr = { .opcode = opcode, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.mod = dst_mod, .dst.writemask = dst->writemask, .dst.reg = dst->id, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), .srcs[0].reg = src->id, .srcs[0].mod = src_mod, @@ -1978,16 +2078,16 @@ static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe }; sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); + d3dbc_write_instruction(d3dbc, &instr); } -static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) +static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); const struct hlsl_ir_node *arg1 = expr->operands[0].node; const struct hlsl_type *dst_type = expr->node.data_type; const struct hlsl_type *src_type = arg1->data_type; + struct hlsl_ctx *ctx = d3dbc->ctx; /* Narrowing casts were already lowered. */ assert(src_type->dimx == dst_type->dimx); @@ -2004,7 +2104,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b /* Integrals are internally represented as floats, so no change is necessary.*/ case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); break; case HLSL_TYPE_DOUBLE: @@ -2028,7 +2128,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b break; case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); break; case HLSL_TYPE_BOOL: @@ -2057,8 +2157,11 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } } -static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +static void d3dbc_write_constant_defs(struct d3dbc_compiler *d3dbc) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + struct hlsl_ctx *ctx = d3dbc->ctx; unsigned int i, x; for (i = 0; i < ctx->constant_defs.count; ++i) @@ -2067,12 +2170,12 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ uint32_t token = D3DSIO_DEF; const struct sm1_dst_register reg = { - .type = D3DSPR_CONST, + .type = VKD3DSPR_CONST, .writemask = VKD3DSP_WRITEMASK_ALL, .reg = constant_reg->index, }; - if (ctx->profile->major_version > 1) + if (version->major > 1) token |= 5 << D3DSI_INSTLENGTH_SHIFT; put_u32(buffer, token); @@ -2082,32 +2185,32 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ } } -static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_var *var, bool output) +static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, + const struct signature_element *element, bool output) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; struct sm1_dst_register reg = {0}; uint32_t token, usage_idx; D3DDECLUSAGE usage; bool ret; - if ((!output && !var->last_read) || (output && !var->first_write)) - return; - - if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) + if (hlsl_sm1_register_from_semantic(version, element->semantic_name, + element->semantic_index, output, ®.type, ®.reg)) { usage = 0; usage_idx = 0; } else { - ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); + ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); assert(ret); - reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; - reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; + reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + reg.reg = element->register_index; } token = D3DSIO_DCL; - if (ctx->profile->major_version > 1) + if (version->major > 1) token |= 2 << D3DSI_INSTLENGTH_SHIFT; put_u32(buffer, token); @@ -2116,39 +2219,47 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; put_u32(buffer, token); - reg.writemask = (1 << var->data_type->dimx) - 1; + reg.writemask = element->mask; write_sm1_dst_register(buffer, ®); } -static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) { + struct vsir_program *program = d3dbc->program; + const struct vkd3d_shader_version *version; bool write_in = false, write_out = false; - struct hlsl_ir_var *var; - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) + version = &program->shader_version; + if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) write_in = true; - else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) + else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) write_in = write_out = true; - else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) + else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) write_in = true; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + if (write_in) + { + for (unsigned int i = 0; i < program->input_signature.element_count; ++i) + d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); + } + + if (write_out) { - if (write_in && var->is_input_semantic) - write_sm1_semantic_dcl(ctx, buffer, var, false); - if (write_out && var->is_output_semantic) - write_sm1_semantic_dcl(ctx, buffer, var, true); + for (unsigned int i = 0; i < program->output_signature.element_count; ++i) + d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); } } -static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; struct sm1_dst_register reg = {0}; uint32_t token, res_type = 0; token = D3DSIO_DCL; - if (ctx->profile->major_version > 1) + if (version->major > 1) token |= 2 << D3DSI_INSTLENGTH_SHIFT; put_u32(buffer, token); @@ -2175,20 +2286,22 @@ static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; put_u32(buffer, token); - reg.type = D3DSPR_SAMPLER; + reg.type = VKD3DSPR_COMBINED_SAMPLER; reg.writemask = VKD3DSP_WRITEMASK_ALL; reg.reg = reg_id; write_sm1_dst_register(buffer, ®); } -static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +static void d3dbc_write_sampler_dcls(struct d3dbc_compiler *d3dbc) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct hlsl_ctx *ctx = d3dbc->ctx; enum hlsl_sampler_dim sampler_dim; unsigned int i, count, reg_id; struct hlsl_ir_var *var; - if (ctx->profile->major_version < 2) + if (version->major < 2) return; LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) @@ -2210,27 +2323,26 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b continue; } - reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; - write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); + reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i; + d3dbc_write_sampler_dcl(d3dbc, reg_id, sampler_dim); } } } } -static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) +static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.reg = instr->reg.id, .dst.writemask = instr->reg.writemask, .has_dst = 1, - .srcs[0].type = D3DSPR_CONST, + .srcs[0].type = VKD3DSPR_CONST, .srcs[0].reg = constant->reg.id, .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), .src_count = 1, @@ -2239,10 +2351,10 @@ static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe assert(instr->reg.allocated); assert(constant->reg.allocated); sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); + d3dbc_write_instruction(d3dbc, &sm1_instr); } -static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) { struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); @@ -2255,28 +2367,30 @@ static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_ src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); - write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); + d3dbc_write_unary_op(d3dbc, opcode, &dst, &src, 0, 0); } } -static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); struct hlsl_ir_node *arg1 = expr->operands[0].node; struct hlsl_ir_node *arg2 = expr->operands[1].node; struct hlsl_ir_node *arg3 = expr->operands[2].node; + struct hlsl_ctx *ctx = d3dbc->ctx; assert(instr->reg.allocated); if (expr->op == HLSL_OP1_REINTERPRET) { - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); return; } if (expr->op == HLSL_OP1_CAST) { - write_sm1_cast(ctx, buffer, instr); + d3dbc_write_cast(d3dbc, instr); return; } @@ -2290,70 +2404,70 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b switch (expr->op) { case HLSL_OP1_ABS: - write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); break; case HLSL_OP1_DSX: - write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); break; case HLSL_OP1_DSY: - write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); break; case HLSL_OP1_EXP2: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); + d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_EXP); break; case HLSL_OP1_LOG2: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); + d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_LOG); break; case HLSL_OP1_NEG: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); break; case HLSL_OP1_SAT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); + d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); break; case HLSL_OP1_RCP: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); + d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); break; case HLSL_OP1_RSQ: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); + d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); break; case HLSL_OP2_ADD: - write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP2_MAX: - write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP2_MIN: - write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP2_MUL: - write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP1_FRACT: - write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); break; case HLSL_OP2_DOT: switch (arg1->data_type->dimx) { case 4: - write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_dot(d3dbc, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); break; case 3: - write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_dot(d3dbc, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); break; default: @@ -2362,27 +2476,27 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b break; case HLSL_OP2_LOGIC_AND: - write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP2_LOGIC_OR: - write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP2_SLT: - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + if (version->type == VKD3D_SHADER_TYPE_PIXEL) hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); - write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP3_CMP: - if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + if (version->type == VKD3D_SHADER_TYPE_VERTEX) hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); - write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + d3dbc_write_ternary_op(d3dbc, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); break; case HLSL_OP3_DP2ADD: - write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); break; default: @@ -2391,10 +2505,9 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } } -static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_block *block); +static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block); -static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { const struct hlsl_ir_if *iff = hlsl_ir_if(instr); const struct hlsl_ir_node *condition; @@ -2408,33 +2521,33 @@ static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buf .opcode = D3DSIO_IFC, .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), .srcs[0].reg = condition->reg.id, .srcs[0].mod = 0, - .srcs[1].type = D3DSPR_TEMP, + .srcs[1].type = VKD3DSPR_TEMP, .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), .srcs[1].reg = condition->reg.id, .srcs[1].mod = D3DSPSM_NEG, .src_count = 2, }; - write_sm1_instruction(ctx, buffer, &sm1_ifc); - write_sm1_block(ctx, buffer, &iff->then_block); + d3dbc_write_instruction(d3dbc, &sm1_ifc); + d3dbc_write_block(d3dbc, &iff->then_block); if (!list_empty(&iff->else_block.instrs)) { sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; - write_sm1_instruction(ctx, buffer, &sm1_else); - write_sm1_block(ctx, buffer, &iff->else_block); + d3dbc_write_instruction(d3dbc, &sm1_else); + d3dbc_write_block(d3dbc, &iff->else_block); } sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; - write_sm1_instruction(ctx, buffer, &sm1_endif); + d3dbc_write_instruction(d3dbc, &sm1_endif); } -static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); @@ -2448,35 +2561,36 @@ static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b { .opcode = D3DSIO_TEXKILL, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.reg = reg->id, .dst.writemask = reg->writemask, .has_dst = 1, }; - write_sm1_instruction(ctx, buffer, &sm1_instr); + d3dbc_write_instruction(d3dbc, &sm1_instr); break; } default: - hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); } } -static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { const struct hlsl_ir_load *load = hlsl_ir_load(instr); + struct hlsl_ctx *ctx = d3dbc->ctx; const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.reg = instr->reg.id, .dst.writemask = instr->reg.writemask, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].reg = reg.id, .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), .src_count = 1, @@ -2487,15 +2601,15 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b if (load->src.var->is_uniform) { assert(reg.allocated); - sm1_instr.srcs[0].type = D3DSPR_CONST; + sm1_instr.srcs[0].type = VKD3DSPR_CONST; } else if (load->src.var->is_input_semantic) { - if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, - false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) + if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name, + load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) { assert(reg.allocated); - sm1_instr.srcs[0].type = D3DSPR_INPUT; + sm1_instr.srcs[0].type = VKD3DSPR_INPUT; sm1_instr.srcs[0].reg = reg.id; } else @@ -2503,32 +2617,34 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); + d3dbc_write_instruction(d3dbc, &sm1_instr); } -static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) +static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); struct hlsl_ir_node *coords = load->coords.node; + struct hlsl_ir_node *ddx = load->ddx.node; + struct hlsl_ir_node *ddy = load->ddy.node; unsigned int sampler_offset, reg_id; + struct hlsl_ctx *ctx = d3dbc->ctx; struct sm1_instruction sm1_instr; sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); - reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; + reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; sm1_instr = (struct sm1_instruction) { - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.reg = instr->reg.id, .dst.writemask = instr->reg.writemask, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].reg = coords->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask), - .srcs[1].type = D3DSPR_SAMPLER, + .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER, .srcs[1].reg = reg_id, .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), @@ -2546,6 +2662,25 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; break; + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + sm1_instr.opcode = D3DSIO_TEX; + sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; + break; + + case HLSL_RESOURCE_SAMPLE_GRAD: + sm1_instr.opcode = D3DSIO_TEXLDD; + + sm1_instr.srcs[2].type = VKD3DSPR_TEMP; + sm1_instr.srcs[2].reg = ddx->reg.id; + sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask); + + sm1_instr.srcs[3].type = VKD3DSPR_TEMP; + sm1_instr.srcs[3].reg = ddy->reg.id; + sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask); + + sm1_instr.src_count += 2; + break; + default: hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); return; @@ -2553,25 +2688,26 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ assert(instr->reg.allocated); - write_sm1_instruction(ctx, buffer, &sm1_instr); + d3dbc_write_instruction(d3dbc, &sm1_instr); } -static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) +static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; const struct hlsl_ir_store *store = hlsl_ir_store(instr); - const struct hlsl_ir_node *rhs = store->rhs.node; + struct hlsl_ctx *ctx = d3dbc->ctx; const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); + const struct hlsl_ir_node *rhs = store->rhs.node; struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.reg = reg.id, .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].reg = rhs->reg.id, .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), .src_count = 1, @@ -2585,16 +2721,16 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * if (store->lhs.var->is_output_semantic) { - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) + if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) { - sm1_instr.dst.type = D3DSPR_TEMP; + sm1_instr.dst.type = VKD3DSPR_TEMP; sm1_instr.dst.reg = 0; } - else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, - true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) + else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name, + store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) { assert(reg.allocated); - sm1_instr.dst.type = D3DSPR_OUTPUT; + sm1_instr.dst.type = VKD3DSPR_OUTPUT; sm1_instr.dst.reg = reg.id; } else @@ -2604,11 +2740,10 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * assert(reg.allocated); sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); + d3dbc_write_instruction(d3dbc, &sm1_instr); } -static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) +static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); const struct hlsl_ir_node *val = swizzle->val.node; @@ -2616,12 +2751,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer { .opcode = D3DSIO_MOV, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.reg = instr->reg.id, .dst.writemask = instr->reg.writemask, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].reg = val->reg.id, .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), swizzle->swizzle, instr->data_type->dimx), @@ -2631,12 +2766,12 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer assert(instr->reg.allocated); assert(val->reg.allocated); sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); + d3dbc_write_instruction(d3dbc, &sm1_instr); } -static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_block *block) +static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block) { + struct hlsl_ctx *ctx = d3dbc->ctx; const struct hlsl_ir_node *instr; LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) @@ -2656,38 +2791,38 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * vkd3d_unreachable(); case HLSL_IR_CONSTANT: - write_sm1_constant(ctx, buffer, instr); + d3dbc_write_constant(d3dbc, instr); break; case HLSL_IR_EXPR: - write_sm1_expr(ctx, buffer, instr); + d3dbc_write_expr(d3dbc, instr); break; case HLSL_IR_IF: if (hlsl_version_ge(ctx, 2, 1)) - write_sm1_if(ctx, buffer, instr); + d3dbc_write_if(d3dbc, instr); else hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); break; case HLSL_IR_JUMP: - write_sm1_jump(ctx, buffer, instr); + d3dbc_write_jump(d3dbc, instr); break; case HLSL_IR_LOAD: - write_sm1_load(ctx, buffer, instr); + d3dbc_write_load(d3dbc, instr); break; case HLSL_IR_RESOURCE_LOAD: - write_sm1_resource_load(ctx, buffer, instr); + d3dbc_write_resource_load(d3dbc, instr); break; case HLSL_IR_STORE: - write_sm1_store(ctx, buffer, instr); + d3dbc_write_store(d3dbc, instr); break; case HLSL_IR_SWIZZLE: - write_sm1_swizzle(ctx, buffer, instr); + d3dbc_write_swizzle(d3dbc, instr); break; default: @@ -2696,32 +2831,45 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * } } -int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving + * data from the other parameters instead, so it can be removed as an argument + * and be declared in vkd3d_shader_private.h and used without relying on HLSL + * IR structs. */ +int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, + struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { - struct vkd3d_bytecode_buffer buffer = {0}; + const struct vkd3d_shader_version *version = &program->shader_version; + struct d3dbc_compiler d3dbc = {0}; + struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; + + d3dbc.ctx = ctx; + d3dbc.program = program; + d3dbc.message_context = message_context; - put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + put_u32(buffer, sm1_version(version->type, version->major, version->minor)); - write_sm1_uniforms(ctx, &buffer, entry_func); + bytecode_put_bytes(buffer, ctab->code, ctab->size); - write_sm1_constant_defs(ctx, &buffer); - write_sm1_semantic_dcls(ctx, &buffer); - write_sm1_sampler_dcls(ctx, &buffer); - write_sm1_block(ctx, &buffer, &entry_func->body); + d3dbc_write_constant_defs(&d3dbc); + d3dbc_write_semantic_dcls(&d3dbc); + d3dbc_write_sampler_dcls(&d3dbc); + d3dbc_write_block(&d3dbc, &entry_func->body); - put_u32(&buffer, D3DSIO_END); + put_u32(buffer, D3DSIO_END); - if (buffer.status) - ctx->result = buffer.status; + if (buffer->status) + ctx->result = buffer->status; if (!ctx->result) { - out->code = buffer.data; - out->size = buffer.size; + out->code = buffer->data; + out->size = buffer->size; } else { - vkd3d_free(buffer.data); + vkd3d_free(buffer->data); } return ctx->result; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 73a8d8687c5..2176debc7d2 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -458,6 +458,8 @@ enum dx_intrinsic_opcode DX_WAVE_ACTIVE_OP = 119, DX_WAVE_ACTIVE_BIT = 120, DX_WAVE_PREFIX_OP = 121, + DX_QUAD_READ_LANE_AT = 122, + DX_QUAD_OP = 123, DX_LEGACY_F32TOF16 = 130, DX_LEGACY_F16TOF32 = 131, DX_WAVE_ALL_BIT_COUNT = 135, @@ -576,6 +578,13 @@ enum dxil_wave_op_kind WAVE_OP_MAX = 3, }; +enum dxil_quad_op_kind +{ + QUAD_READ_ACROSS_X = 0, + QUAD_READ_ACROSS_Y = 1, + QUAD_READ_ACROSS_D = 2, +}; + struct sm6_pointer_info { const struct sm6_type *type; @@ -3755,21 +3764,21 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) for (i = 0; i < sm6->p.program->instructions.count; ++i) { ins = &sm6->p.program->instructions.elements[i]; - if (ins->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) + if (ins->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) { ins->declaration.indexable_temp.initialiser = resolve_forward_initialiser( (uintptr_t)ins->declaration.indexable_temp.initialiser, sm6); } - else if (ins->handler_idx == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) + else if (ins->opcode == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) { ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); } - else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) + else if (ins->opcode == VKD3DSIH_DCL_TGSM_RAW) { ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); ins->flags = 0; } - else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) + else if (ins->opcode == VKD3DSIH_DCL_TGSM_STRUCTURED) { ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); ins->flags = 0; @@ -4402,7 +4411,7 @@ static void sm6_parser_emit_br(struct sm6_parser *sm6, const struct dxil_record code_block->terminator.false_block = sm6_function_get_block(function, record->operands[1], sm6); } - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; } static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, @@ -4619,6 +4628,8 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co return VKD3DSIH_IMAX; case DX_IMIN: return VKD3DSIH_IMIN; + case DX_QUAD_READ_LANE_AT: + return VKD3DSIH_QUAD_READ_LANE_AT; case DX_UMAX: return VKD3DSIH_UMAX; case DX_UMIN: @@ -4962,7 +4973,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int reg->non_uniform = !!sm6_value_get_constant_uint(operands[3]); /* NOP is used to flag no instruction emitted. */ - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; } static void sm6_parser_emit_dx_stream(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5370,6 +5381,47 @@ static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intr sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_PRIMID, VKD3D_DATA_UINT); } +static enum vkd3d_shader_opcode dx_map_quad_op(enum dxil_quad_op_kind op) +{ + switch (op) + { + case QUAD_READ_ACROSS_X: + return VKD3DSIH_QUAD_READ_ACROSS_X; + case QUAD_READ_ACROSS_Y: + return VKD3DSIH_QUAD_READ_ACROSS_Y; + case QUAD_READ_ACROSS_D: + return VKD3DSIH_QUAD_READ_ACROSS_D; + default: + return VKD3DSIH_INVALID; + } +} + +static void sm6_parser_emit_dx_quad_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + enum vkd3d_shader_opcode opcode; + enum dxil_quad_op_kind quad_op; + + quad_op = sm6_value_get_constant_uint(operands[1]); + if ((opcode = dx_map_quad_op(quad_op)) == VKD3DSIH_INVALID) + { + FIXME("Unhandled quad op kind %u.\n", quad_op); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, + "Quad op kind %u is unhandled.", quad_op); + return; + } + + vsir_instruction_init(ins, &sm6->p.location, opcode); + + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + src_param_init_from_value(src_param, operands[0]); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -6229,6 +6281,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, [DX_OUTPUT_CONTROL_POINT_ID ] = {"i", "", sm6_parser_emit_dx_output_control_point_id}, [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, + [DX_QUAD_OP ] = {"n", "Rc", sm6_parser_emit_dx_quad_op}, + [DX_QUAD_READ_LANE_AT ] = {"n", "Ri", sm6_parser_emit_dx_binary}, [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, @@ -6381,7 +6435,7 @@ static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shade { const struct sm6_type *type; - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; if (!dst->type) return; @@ -6628,7 +6682,7 @@ static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_recor { *dst = *value; dst->type = type; - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; return; } @@ -6739,7 +6793,7 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor * do not otherwise occur, so deleting these avoids the need for backend support. */ if (sm6_type_is_bool(type_a) && code == ICMP_NE && sm6_value_is_constant_zero(b)) { - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; *dst = *a; return; } @@ -7039,7 +7093,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record reg->idx_count = 2; dst->structure_stride = src->structure_stride; - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; } static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_record *record, @@ -7189,7 +7243,7 @@ static void sm6_parser_emit_phi(struct sm6_parser *sm6, const struct dxil_record incoming[j].block = sm6_function_get_block(function, record->operands[i + 1], sm6); } - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; qsort(incoming, phi->incoming_count, sizeof(*incoming), phi_incoming_compare); @@ -7224,7 +7278,7 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record code_block->terminator.type = TERMINATOR_RET; - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; } static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, @@ -7384,7 +7438,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec terminator->cases[i / 2u].value = sm6_value_get_constant_uint64(src); } - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; } static void sm6_parser_emit_vselect(struct sm6_parser *sm6, const struct dxil_record *record, @@ -7843,7 +7897,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const } ins = &code_block->instructions[code_block->instruction_count]; - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; dst = sm6_parser_get_current_value(sm6); fwd_type = dst->type; @@ -7922,7 +7976,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const if (sm6->p.failed) return VKD3D_ERROR; - assert(ins->handler_idx != VKD3DSIH_INVALID); if (record->attachment) metadata_attachment_record_apply(record->attachment, record->code, ins, dst, sm6); @@ -7933,9 +7986,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; } if (code_block) - code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; - else - assert(ins->handler_idx == VKD3DSIH_NOP); + code_block->instruction_count += ins->opcode != VKD3DSIH_NOP; if (dst->type && fwd_type && dst->type != fwd_type) { @@ -8735,7 +8786,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc if (!m) { - ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; + ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; ins->declaration.raw_resource.resource.reg.write_mask = 0; return &ins->declaration.raw_resource.resource; } @@ -8760,7 +8811,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc "A typed resource has no data type."); } - ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; + ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; for (i = 0; i < VKD3D_VEC4_SIZE; ++i) ins->declaration.semantic.resource_data_type[i] = resource_values.data_type; ins->declaration.semantic.resource_type = resource_type; @@ -8770,14 +8821,14 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc } else if (kind == RESOURCE_KIND_RAWBUFFER) { - ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; + ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; ins->declaration.raw_resource.resource.reg.write_mask = 0; return &ins->declaration.raw_resource.resource; } else if (kind == RESOURCE_KIND_STRUCTUREDBUFFER) { - ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; + ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; ins->declaration.structured_resource.byte_stride = resource_values.byte_stride; ins->declaration.structured_resource.resource.reg.write_mask = 0; @@ -8858,7 +8909,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, d->kind = kind; d->reg_type = VKD3DSPR_RESOURCE; d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_RESOURCE; - d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL) + d->resource_data_type = (ins->opcode == VKD3DSIH_DCL) ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; init_resource_declaration(resource, VKD3DSPR_RESOURCE, d->reg_data_type, d->id, &d->range); @@ -8932,7 +8983,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, d->kind = values[0]; d->reg_type = VKD3DSPR_UAV; d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_UAV; - d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL_UAV_TYPED) + d->resource_data_type = (ins->opcode == VKD3DSIH_DCL_UAV_TYPED) ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; init_resource_declaration(resource, VKD3DSPR_UAV, d->reg_data_type, d->id, &d->range); diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index 57b4ac24212..bd2ad1290cd 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -56,6 +56,70 @@ static void string_storage_destroy(struct rb_entry *entry, void *context) vkd3d_free(string_entry); } +struct state_block_function_info +{ + const char *name; + unsigned int min_args, max_args; +}; + +static const struct state_block_function_info *get_state_block_function_info(const char *name) +{ + static const struct state_block_function_info valid_functions[] = + { + {"SetBlendState", 3, 3}, + {"SetDepthStencilState", 2, 2}, + {"SetRasterizerState", 1, 1}, + {"SetVertexShader", 1, 1}, + {"SetDomainShader", 1, 1}, + {"SetHullShader", 1, 1}, + {"SetGeometryShader", 1, 1}, + {"SetPixelShader", 1, 1}, + {"SetComputeShader", 1, 1}, + {"OMSetRenderTargets", 2, 9}, + }; + + for (unsigned int i = 0; i < ARRAY_SIZE(valid_functions); ++i) + { + if (!strcmp(name, valid_functions[i].name)) + return &valid_functions[i]; + } + return NULL; +} + +bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, + const struct vkd3d_shader_location *loc) +{ + if (entry->is_function_call) + { + const struct state_block_function_info *info = get_state_block_function_info(entry->name); + + if (!info) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, + "Invalid state block function '%s'.", entry->name); + return false; + } + if (entry->args_count < info->min_args || entry->args_count > info->max_args) + { + if (info->min_args == info->max_args) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, + "Invalid argument count for state block function '%s' (expected %u).", + entry->name, info->min_args); + } + else + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, + "Invalid argument count for state block function '%s' (expected from %u to %u).", + entry->name, info->min_args, info->max_args); + } + return false; + } + } + + return true; +} + struct fx_write_context; struct fx_write_context_ops @@ -63,6 +127,7 @@ struct fx_write_context_ops uint32_t (*write_string)(const char *string, struct fx_write_context *fx); void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); + void (*write_annotation)(struct hlsl_ir_var *var, struct fx_write_context *fx); bool are_child_effects_supported; }; @@ -94,6 +159,8 @@ struct fx_write_context uint32_t texture_count; uint32_t uav_count; uint32_t sampler_state_count; + uint32_t depth_stencil_state_count; + uint32_t rasterizer_state_count; int status; bool child_effect; @@ -128,8 +195,41 @@ static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) fx->ops->write_pass(var, fx); } +static uint32_t write_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) +{ + struct hlsl_ctx *ctx = fx->ctx; + struct hlsl_ir_var *v; + uint32_t count = 0; + + if (!scope) + return 0; + + LIST_FOR_EACH_ENTRY(v, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (!v->default_values) + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Annotation variable is missing default value."); + + fx->ops->write_annotation(v, fx); + ++count; + } + + return count; +} + +static void write_fx_4_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t count_offset, count; + + count_offset = put_u32(buffer, 0); + count = write_annotations(scope, fx); + set_u32(buffer, count_offset, count); +} + static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); static const char * get_fx_4_type_name(const struct hlsl_type *type); +static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx); static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) { @@ -279,9 +379,9 @@ static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx name_offset = write_string(var->name, fx); put_u32(buffer, name_offset); put_u32(buffer, 0); /* Assignment count. */ - put_u32(buffer, 0); /* Annotation count. */ - /* TODO: annotations */ + write_fx_4_annotations(var->annotations, fx); + /* TODO: assignments */ } @@ -402,6 +502,9 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) case HLSL_CLASS_UAV: return uav_type_names[type->sampler_dim]; + case HLSL_CLASS_DEPTH_STENCIL_STATE: + return "DepthStencilState"; + case HLSL_CLASS_DEPTH_STENCIL_VIEW: return "DepthStencilView"; @@ -421,10 +524,20 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) { + struct field_offsets + { + uint32_t name; + uint32_t semantic; + uint32_t offset; + uint32_t type; + }; + uint32_t name_offset, offset, total_size, packed_size, stride, numeric_desc; struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; - uint32_t name_offset, offset, size, stride, numeric_desc; + struct field_offsets *field_offsets = NULL; + struct hlsl_ctx *ctx = fx->ctx; uint32_t elements_count = 0; const char *name; + size_t i; /* Resolve arrays to element type and number of elements. */ if (type->class == HLSL_CLASS_ARRAY) @@ -436,6 +549,22 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co name = get_fx_4_type_name(type); name_offset = write_string(name, fx); + if (type->class == HLSL_CLASS_STRUCT) + { + if (!(field_offsets = hlsl_calloc(ctx, type->e.record.field_count, sizeof(*field_offsets)))) + return 0; + + for (i = 0; i < type->e.record.field_count; ++i) + { + const struct hlsl_struct_field *field = &type->e.record.fields[i]; + + field_offsets[i].name = write_string(field->name, fx); + field_offsets[i].semantic = write_string(field->semantic.raw_name, fx); + field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC]; + field_offsets[i].type = write_type(field->type, fx); + } + } + offset = put_u32_unaligned(buffer, name_offset); switch (type->class) @@ -446,8 +575,10 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co put_u32_unaligned(buffer, 1); break; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_TEXTURE: @@ -464,6 +595,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_CONSTANT_BUFFER: vkd3d_unreachable(); case HLSL_CLASS_STRING: @@ -473,34 +605,40 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co return 0; } - size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); + /* Structures can only contain numeric fields, this is validated during variable declaration. */ + total_size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); + packed_size = 0; + if (type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type)) + packed_size = hlsl_type_component_count(type) * sizeof(float); if (elements_count) - size *= elements_count; + { + total_size *= elements_count; + packed_size *= elements_count; + } stride = align(stride, 4 * sizeof(float)); put_u32_unaligned(buffer, elements_count); - put_u32_unaligned(buffer, size); /* Total size. */ - put_u32_unaligned(buffer, stride); /* Stride. */ - put_u32_unaligned(buffer, size); + put_u32_unaligned(buffer, total_size); + put_u32_unaligned(buffer, stride); + put_u32_unaligned(buffer, packed_size); if (type->class == HLSL_CLASS_STRUCT) { - size_t i; - put_u32_unaligned(buffer, type->e.record.field_count); for (i = 0; i < type->e.record.field_count; ++i) { - const struct hlsl_struct_field *field = &type->e.record.fields[i]; - uint32_t semantic_offset, field_type_offset; + const struct field_offsets *field = &field_offsets[i]; - name_offset = write_string(field->name, fx); - semantic_offset = write_string(field->semantic.name, fx); - field_type_offset = write_type(field->type, fx); + put_u32_unaligned(buffer, field->name); + put_u32_unaligned(buffer, field->semantic); + put_u32_unaligned(buffer, field->offset); + put_u32_unaligned(buffer, field->type); + } - put_u32_unaligned(buffer, name_offset); - put_u32_unaligned(buffer, semantic_offset); - put_u32_unaligned(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); - put_u32_unaligned(buffer, field_type_offset); + if (ctx->profile->major_version == 5) + { + put_u32_unaligned(buffer, 0); /* Base class type */ + put_u32_unaligned(buffer, 0); /* Interface count */ } } else if (type->class == HLSL_CLASS_TEXTURE) @@ -556,6 +694,14 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co { put_u32_unaligned(buffer, 6); } + else if (type->class == HLSL_CLASS_RASTERIZER_STATE) + { + put_u32_unaligned(buffer, 4); + } + else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) + { + put_u32_unaligned(buffer, 3); + } else if (hlsl_is_numeric_type(type)) { numeric_desc = get_fx_4_numeric_type_description(type, fx); @@ -565,9 +711,9 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co { FIXME("Type %u is not supported.\n", type->class); set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); - return 0; } + vkd3d_free(field_offsets); return offset; } @@ -581,8 +727,9 @@ static void write_fx_4_technique(struct hlsl_ir_var *var, struct fx_write_contex name_offset = write_string(var->name, fx); put_u32(buffer, name_offset); count_offset = put_u32(buffer, 0); - put_u32(buffer, 0); /* Annotation count. */ + write_fx_4_annotations(var->annotations, fx); + count = 0; LIST_FOR_EACH_ENTRY(pass, &var->scope->vars, struct hlsl_ir_var, scope_entry) { write_pass(pass, fx); @@ -617,7 +764,7 @@ static void write_group(struct hlsl_ir_var *var, struct fx_write_context *fx) put_u32(buffer, name_offset); count_offset = put_u32(buffer, 0); /* Technique count */ - put_u32(buffer, 0); /* Annotation count */ + write_fx_4_annotations(var ? var->annotations : NULL, fx); count = fx->technique_count; write_techniques(var ? var->scope : fx->ctx->globals, fx); @@ -683,7 +830,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n } name_offset = write_string(name, fx); - semantic_offset = write_string(semantic->name, fx); + semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0; offset = put_u32(buffer, hlsl_sm1_base_type(type)); put_u32(buffer, hlsl_sm1_class(type)); @@ -794,6 +941,9 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f case HLSL_CLASS_MATRIX: case HLSL_CLASS_STRUCT: /* FIXME: write actual initial value */ + if (var->default_values) + hlsl_fixme(fx->ctx, &var->loc, "Write default values.\n"); + offset = put_u32(buffer, 0); for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) @@ -850,8 +1000,10 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); return false; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_UAV: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_VOID: return false; @@ -859,6 +1011,7 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_CONSTANT_BUFFER: /* This cannot appear as an extern variable. */ break; } @@ -972,9 +1125,72 @@ static const struct fx_write_context_ops fx_4_ops = .write_string = write_fx_4_string, .write_technique = write_fx_4_technique, .write_pass = write_fx_4_pass, + .write_annotation = write_fx_4_annotation, .are_child_effects_supported = true, }; +static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hlsl_default_value *value, + struct fx_write_context *fx) +{ + const struct hlsl_type *type = hlsl_get_multiarray_element_type(value_type); + uint32_t elements_count = hlsl_get_multiarray_size(value_type), i, j; + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + struct hlsl_ctx *ctx = fx->ctx; + uint32_t offset = buffer->size; + unsigned int comp_count; + + if (!value) + return 0; + + comp_count = hlsl_type_component_count(type); + + for (i = 0; i < elements_count; ++i) + { + switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + { + switch (type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + + for (j = 0; j < comp_count; ++j) + { + put_u32_unaligned(buffer, value->value.u); + value++; + } + break; + default: + hlsl_fixme(ctx, &ctx->location, "Writing default values for numeric type %u is not implemented.", + type->e.numeric.type); + } + + break; + } + case HLSL_CLASS_STRUCT: + { + struct hlsl_struct_field *fields = type->e.record.fields; + + for (j = 0; j < type->e.record.field_count; ++j) + { + write_fx_4_default_value(fields[i].type, value, fx); + value += hlsl_type_component_count(fields[i].type); + } + break; + } + default: + hlsl_fixme(ctx, &ctx->location, "Writing default values for class %u is not implemented.", type->class); + } + } + + return offset; +} + static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, struct fx_write_context *fx) { struct vkd3d_bytecode_buffer *buffer = &fx->structured; @@ -984,22 +1200,20 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st { HAS_EXPLICIT_BIND_POINT = 0x4, }; - struct hlsl_ctx *ctx = fx->ctx; - /* Explicit bind point. */ - if (var->reg_reservation.reg_type) + if (var->has_explicit_bind_point) flags |= HAS_EXPLICIT_BIND_POINT; type_offset = write_type(var->data_type, fx); name_offset = write_string(var->name, fx); - semantic_offset = write_string(var->semantic.name, fx); + semantic_offset = write_string(var->semantic.raw_name, fx); put_u32(buffer, name_offset); put_u32(buffer, type_offset); semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ - put_u32(buffer, var->buffer_offset); /* Offset in the constant buffer */ - value_offset = put_u32(buffer, 0); /* Default value offset */ + put_u32(buffer, var->buffer_offset * 4); /* Offset in the constant buffer, in bytes. */ + value_offset = put_u32(buffer, 0); put_u32(buffer, flags); /* Flags */ if (shared) @@ -1008,17 +1222,39 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st } else { - /* FIXME: write default value */ - set_u32(buffer, value_offset, 0); + uint32_t offset = write_fx_4_default_value(var->data_type, var->default_values, fx); + set_u32(buffer, value_offset, offset); - put_u32(buffer, 0); /* Annotations count */ - if (has_annotations(var)) - hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); + write_fx_4_annotations(var->annotations, fx); fx->numeric_variable_count++; } } +static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx) +{ + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t name_offset, type_offset, offset; + struct hlsl_ctx *ctx = fx->ctx; + + name_offset = write_string(var->name, fx); + type_offset = write_type(var->data_type, fx); + + put_u32(buffer, name_offset); + put_u32(buffer, type_offset); + + if (hlsl_is_numeric_type(type)) + { + offset = write_fx_4_default_value(var->data_type, var->default_values, fx); + put_u32(buffer, offset); + } + else + { + hlsl_fixme(ctx, &var->loc, "Writing annotations for type class %u is not implemented.", type->class); + } +} + struct rhs_named_value { const char *name; @@ -1118,6 +1354,9 @@ static bool state_block_contains_state(const char *name, unsigned int start, str for (i = start; i < block->count; ++i) { + if (block->entries[i]->is_function_call) + continue; + if (!ascii_strcasecmp(block->entries[i]->name, name)) return true; } @@ -1160,6 +1399,41 @@ static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_no return true; } +static void fold_state_value(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry) +{ + bool progress; + + do + { + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); + progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); + } while (progress); +} + +enum state_property_component_type +{ + FX_BOOL, + FX_FLOAT, + FX_UINT, + FX_UINT8, +}; + +static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_component_type type) +{ + switch (type) + { + case FX_BOOL: + return HLSL_TYPE_BOOL; + case FX_FLOAT: + return HLSL_TYPE_FLOAT; + case FX_UINT: + case FX_UINT8: + return HLSL_TYPE_UINT; + default: + vkd3d_unreachable(); + } +} + static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, struct fx_write_context *fx) { @@ -1209,37 +1483,112 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl { NULL } }; + static const struct rhs_named_value depth_write_mask_values[] = + { + { "ZERO", 0 }, + { "ALL", 1 }, + { NULL } + }; + + static const struct rhs_named_value comparison_values[] = + { + { "NEVER", 1 }, + { "LESS", 2 }, + { "EQUAL", 3 }, + { "LESS_EQUAL", 4 }, + { "GREATER", 5 }, + { "NOT_EQUAL", 6 }, + { "GREATER_EQUAL", 7 }, + { "ALWAYS", 8 }, + { NULL } + }; + + static const struct rhs_named_value stencil_op_values[] = + { + { "KEEP", 1 }, + { "ZERO", 2 }, + { "REPLACE", 3 }, + { "INCR_SAT", 4 }, + { "DECR_SAT", 5 }, + { "INVERT", 6 }, + { "INCR", 7 }, + { "DECR", 8 }, + { NULL } + }; + + static const struct rhs_named_value fill_values[] = + { + { "WIREFRAME", 2 }, + { "SOLID", 3 }, + { NULL } + }; + + static const struct rhs_named_value cull_values[] = + { + { "NONE", 1 }, + { "FRONT", 2 }, + { "BACK", 3 }, + { NULL } + }; + static const struct state { const char *name; enum hlsl_type_class container; - enum hlsl_base_type type; + enum hlsl_type_class class; + enum state_property_component_type type; unsigned int dimx; uint32_t id; const struct rhs_named_value *values; } states[] = { - { "Filter", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 45, filter_values }, - { "AddressU", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 46, address_values }, - { "AddressV", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 47, address_values }, - { "AddressW", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 48, address_values }, - { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 49 }, - { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 50 }, - { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 51, compare_func_values }, - { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 4, 52 }, - { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 53 }, - { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 54 }, + { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 12, fill_values }, + { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 13, cull_values }, + { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 14 }, + { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 15 }, + { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 16 }, + { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 17 }, + { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 18 }, + { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 19 }, + { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 20 }, + { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 21 }, + + { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 22 }, + { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 23, depth_write_mask_values }, + { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 24, comparison_values }, + { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 25 }, + { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 26 }, + { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 27 }, + { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 28, stencil_op_values }, + { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 29, stencil_op_values }, + { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 30, stencil_op_values }, + { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 31, comparison_values }, + { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 32, stencil_op_values }, + { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 33, stencil_op_values }, + { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 34, stencil_op_values }, + { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 35, comparison_values }, + + { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 45, filter_values }, + { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 46, address_values }, + { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 47, address_values }, + { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 48, address_values }, + { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 49 }, + { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 50 }, + { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 51, compare_func_values }, + { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 52 }, + { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 53 }, + { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 54 }, /* TODO: "Texture" field */ }; const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); struct replace_state_context replace_context; + struct hlsl_type *state_type = NULL; struct hlsl_ir_node *node, *cast; const struct state *state = NULL; struct hlsl_ctx *ctx = fx->ctx; - struct hlsl_type *state_type; + enum hlsl_base_type base_type; unsigned int i; - bool progress; for (i = 0; i < ARRAY_SIZE(states); ++i) { @@ -1269,28 +1618,54 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl replace_context.values = state->values; replace_context.var = var; - /* Turned named constants to actual constants. */ + /* Turn named constants to actual constants. */ hlsl_transform_ir(ctx, replace_state_block_constant, entry->instrs, &replace_context); + fold_state_value(ctx, entry); - if (state->dimx) - state_type = hlsl_get_vector_type(ctx, state->type, state->dimx); - else - state_type = hlsl_get_scalar_type(ctx, state->type); - - /* Cast to expected property type. */ - node = entry->args->node; - if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) - return; - list_add_after(&node->entry, &cast->entry); + /* Now cast and run folding again. */ - hlsl_src_remove(entry->args); - hlsl_src_from_node(entry->args, cast); + base_type = hlsl_type_from_fx_type(state->type); + switch (state->class) + { + case HLSL_CLASS_VECTOR: + state_type = hlsl_get_vector_type(ctx, base_type, state->dimx); + break; + case HLSL_CLASS_SCALAR: + state_type = hlsl_get_scalar_type(ctx, base_type); + break; + case HLSL_CLASS_TEXTURE: + hlsl_fixme(ctx, &ctx->location, "Object type fields are not supported."); + break; + default: + ; + } - do + if (state_type) { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); - progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); - } while (progress); + node = entry->args->node; + if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) + return; + list_add_after(&node->entry, &cast->entry); + + /* FX_UINT8 values are using 32-bits in the binary. Mask higher 24 bits for those. */ + if (state->type == FX_UINT8) + { + struct hlsl_ir_node *mask; + + if (!(mask = hlsl_new_uint_constant(ctx, 0xff, &var->loc))) + return; + list_add_after(&cast->entry, &mask->entry); + + if (!(cast = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, cast, mask))) + return; + list_add_after(&mask->entry, &cast->entry); + } + + hlsl_src_remove(entry->args); + hlsl_src_from_node(entry->args, cast); + + fold_state_value(ctx, entry); + } } static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) @@ -1344,7 +1719,7 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ type_offset = write_type(var->data_type, fx); name_offset = write_string(var->name, fx); - semantic_offset = write_string(var->semantic.name, fx); + semantic_offset = write_string(var->semantic.raw_name, fx); put_u32(buffer, name_offset); put_u32(buffer, type_offset); @@ -1383,19 +1758,27 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ fx->dsv_count += elements_count; break; + case HLSL_CLASS_DEPTH_STENCIL_STATE: + write_fx_4_state_object_initializer(var, fx); + fx->depth_stencil_state_count += elements_count; + break; + case HLSL_CLASS_SAMPLER: write_fx_4_state_object_initializer(var, fx); fx->sampler_state_count += elements_count; break; + case HLSL_CLASS_RASTERIZER_STATE: + write_fx_4_state_object_initializer(var, fx); + fx->rasterizer_state_count += elements_count; + break; + default: hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", type->e.numeric.type); } - put_u32(buffer, 0); /* Annotations count */ - if (has_annotations(var)) - hlsl_fixme(ctx, &ctx->location, "Writing annotations for object variables is not implemented."); + write_fx_4_annotations(var->annotations, fx); ++fx->object_variable_count; } @@ -1438,9 +1821,7 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx } else { - put_u32(buffer, 0); /* Annotations count */ - if (b->annotations) - hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); + write_fx_4_annotations(b->annotations, fx); ++fx->buffer_count; } @@ -1464,6 +1845,9 @@ static void write_buffers(struct fx_write_context *fx, bool shared) { struct hlsl_buffer *buffer; + if (shared && !fx->child_effect) + return; + LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) { if (!buffer->size && !fx->include_empty_buffers) @@ -1483,8 +1867,10 @@ static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struc switch (type->class) { + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_TEXTURE: @@ -1551,9 +1937,9 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) size_offset = put_u32(&buffer, 0); /* Unstructured size. */ put_u32(&buffer, 0); /* String count. */ put_u32(&buffer, fx.texture_count); - put_u32(&buffer, 0); /* Depth stencil state count. */ + put_u32(&buffer, fx.depth_stencil_state_count); put_u32(&buffer, 0); /* Blend state count. */ - put_u32(&buffer, 0); /* Rasterizer state count. */ + put_u32(&buffer, fx.rasterizer_state_count); put_u32(&buffer, fx.sampler_state_count); put_u32(&buffer, fx.rtv_count); put_u32(&buffer, fx.dsv_count); @@ -1609,9 +1995,9 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) size_offset = put_u32(&buffer, 0); /* Unstructured size. */ put_u32(&buffer, 0); /* String count. */ put_u32(&buffer, fx.texture_count); - put_u32(&buffer, 0); /* Depth stencil state count. */ + put_u32(&buffer, fx.depth_stencil_state_count); put_u32(&buffer, 0); /* Blend state count. */ - put_u32(&buffer, 0); /* Rasterizer state count. */ + put_u32(&buffer, fx.rasterizer_state_count); put_u32(&buffer, fx.sampler_state_count); put_u32(&buffer, fx.rtv_count); put_u32(&buffer, fx.dsv_count); diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index 3e482a5fc70..8725724a239 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -48,9 +48,9 @@ static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigne static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) { shader_glsl_print_indent(&gen->buffer, gen->indent); - vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); + vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->opcode); vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); + "Internal compiler error: Unhandled instruction %#x.", ins->opcode); } static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, @@ -74,7 +74,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator { generator->location = instruction->location; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_DCL_INPUT: case VKD3DSIH_DCL_OUTPUT: diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 99214fba6de..acf50869a40 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -167,6 +167,8 @@ void hlsl_free_var(struct hlsl_ir_var *decl) for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) vkd3d_free((void *)decl->objects_usage[k]); + vkd3d_free(decl->default_values); + for (i = 0; i < decl->state_block_count; ++i) hlsl_free_state_block(decl->state_blocks[i]); vkd3d_free(decl->state_blocks); @@ -367,15 +369,18 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type type->reg_size[HLSL_REGSET_UAVS] = 1; break; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_STRING: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VERTEX_SHADER: case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: break; } } @@ -435,11 +440,13 @@ static bool type_is_single_component(const struct hlsl_type *type) { switch (type->class) { + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_SCALAR: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_STRING: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_TEXTURE: case HLSL_CLASS_UAV: @@ -450,6 +457,7 @@ static bool type_is_single_component(const struct hlsl_type *type) case HLSL_CLASS_MATRIX: case HLSL_CLASS_STRUCT: case HLSL_CLASS_ARRAY: + case HLSL_CLASS_CONSTANT_BUFFER: return false; case HLSL_CLASS_EFFECT_GROUP: @@ -528,6 +536,12 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, vkd3d_unreachable(); } + case HLSL_CLASS_CONSTANT_BUFFER: + { + *type_ptr = type->e.resource.format; + return traverse_path_from_component_index(ctx, type_ptr, index_ptr); + } + default: vkd3d_unreachable(); } @@ -556,12 +570,14 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty switch (type->class) { - case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: offset[HLSL_REGSET_NUMERIC] += idx; break; + case HLSL_CLASS_MATRIX: + offset[HLSL_REGSET_NUMERIC] += 4 * idx; + break; + case HLSL_CLASS_STRUCT: for (r = 0; r <= HLSL_REGSET_LAST; ++r) offset[r] += type->e.record.fields[idx].reg_offset[r]; @@ -577,8 +593,10 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty } break; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_STRING: @@ -592,6 +610,8 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_CONSTANT_BUFFER: vkd3d_unreachable(); } type = next_type; @@ -865,6 +885,20 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim return type; } +struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format) +{ + struct hlsl_type *type; + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + type->class = HLSL_CLASS_CONSTANT_BUFFER; + type->dimy = 1; + type->e.resource.format = format; + hlsl_type_calculate_reg_size(ctx, type); + list_add_tail(&ctx->types, &type->entry); + return type; +} + static const char * get_case_insensitive_typename(const char *name) { static const char *const names[] = @@ -956,8 +990,13 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) case HLSL_CLASS_ARRAY: return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; + case HLSL_CLASS_CONSTANT_BUFFER: + return hlsl_type_component_count(type->e.resource.format); + + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_STRING: @@ -1038,10 +1077,15 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 case HLSL_CLASS_TECHNIQUE: return t1->e.version == t2->e.version; + case HLSL_CLASS_CONSTANT_BUFFER: + return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); + + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_STRING: case HLSL_CLASS_VERTEX_SHADER: @@ -1247,6 +1291,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const cha list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); else list_add_tail(&ctx->globals->vars, &var->scope_entry); + var->is_synthetic = true; } return var; } @@ -1765,7 +1810,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type } struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - struct hlsl_block *block, const struct vkd3d_shader_location *loc) + struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, + unsigned int unroll_limit, const struct vkd3d_shader_location *loc) { struct hlsl_ir_loop *loop; @@ -1774,6 +1820,9 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); hlsl_block_init(&loop->body); hlsl_block_add_block(&loop->body, block); + + loop->unroll_type = unroll_type; + loop->unroll_limit = unroll_limit; return &loop->node; } @@ -1836,9 +1885,7 @@ static struct hlsl_ir_node *map_instr(const struct clone_instr_map *map, struct return map->instrs[i].dst; } - /* The block passed to hlsl_clone_block() should have been free of external - * references. */ - vkd3d_unreachable(); + return src; } static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, @@ -1935,7 +1982,7 @@ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_ if (!clone_block(ctx, &body, &src->body, map)) return NULL; - if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) + if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) { hlsl_block_cleanup(&body); return NULL; @@ -2407,10 +2454,21 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru } return string; + case HLSL_CLASS_CONSTANT_BUFFER: + vkd3d_string_buffer_printf(string, "ConstantBuffer"); + if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) + { + vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); + hlsl_release_string_buffer(ctx, inner_string); + } + return string; + + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_STRING: @@ -2735,6 +2793,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) static const char *const op_names[] = { [HLSL_OP0_VOID] = "void", + [HLSL_OP0_RASTERIZER_SAMPLE_COUNT] = "GetRenderTargetSampleCount", [HLSL_OP1_ABS] = "abs", [HLSL_OP1_BIT_NOT] = "~", @@ -3086,6 +3145,33 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl vkd3d_string_buffer_cleanup(&buffer); } +void hlsl_dump_var_default_values(const struct hlsl_ir_var *var) +{ + unsigned int k, component_count = hlsl_type_component_count(var->data_type); + struct vkd3d_string_buffer buffer; + + vkd3d_string_buffer_init(&buffer); + if (!var->default_values) + { + vkd3d_string_buffer_printf(&buffer, "var \"%s\" has no default values.\n", var->name); + vkd3d_string_buffer_trace(&buffer); + vkd3d_string_buffer_cleanup(&buffer); + return; + } + + vkd3d_string_buffer_printf(&buffer, "var \"%s\" default values:", var->name); + for (k = 0; k < component_count; ++k) + { + if (k % 4 == 0) + vkd3d_string_buffer_printf(&buffer, "\n "); + vkd3d_string_buffer_printf(&buffer, " 0x%08x", var->default_values[k].value.u); + } + vkd3d_string_buffer_printf(&buffer, "\n"); + + vkd3d_string_buffer_trace(&buffer); + vkd3d_string_buffer_cleanup(&buffer); +} + void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) { struct hlsl_src *src, *next; @@ -3319,9 +3405,25 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) void hlsl_cleanup_semantic(struct hlsl_semantic *semantic) { vkd3d_free((void *)semantic->name); + vkd3d_free((void *)semantic->raw_name); memset(semantic, 0, sizeof(*semantic)); } +bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src) +{ + *dst = *src; + dst->name = dst->raw_name = NULL; + if (src->name && !(dst->name = hlsl_strdup(ctx, src->name))) + return false; + if (src->raw_name && !(dst->raw_name = hlsl_strdup(ctx, src->raw_name))) + { + hlsl_cleanup_semantic(dst); + return false; + } + + return true; +} + static void free_function_decl(struct hlsl_ir_function_decl *decl) { unsigned int i; @@ -3712,9 +3814,11 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pass", HLSL_CLASS_PASS)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pixelshader", HLSL_CLASS_PIXEL_SHADER)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RasterizerState", HLSL_CLASS_RASTERIZER_STATE)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index 27814f3a56f..5832958712a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -78,10 +78,12 @@ enum hlsl_type_class HLSL_CLASS_LAST_NUMERIC = HLSL_CLASS_MATRIX, HLSL_CLASS_STRUCT, HLSL_CLASS_ARRAY, + HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_DEPTH_STENCIL_VIEW, HLSL_CLASS_EFFECT_GROUP, HLSL_CLASS_PASS, HLSL_CLASS_PIXEL_SHADER, + HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_RENDER_TARGET_VIEW, HLSL_CLASS_SAMPLER, HLSL_CLASS_STRING, @@ -89,6 +91,7 @@ enum hlsl_type_class HLSL_CLASS_TEXTURE, HLSL_CLASS_UAV, HLSL_CLASS_VERTEX_SHADER, + HLSL_CLASS_CONSTANT_BUFFER, HLSL_CLASS_VOID, }; @@ -222,6 +225,8 @@ struct hlsl_semantic const char *name; uint32_t index; + /* Name exactly as it appears in the sources. */ + const char *raw_name; /* If the variable or field that stores this hlsl_semantic has already reported that it is missing. */ bool reported_missing; /* In case the variable or field that stores this semantic has already reported to use a @@ -259,8 +264,20 @@ struct hlsl_struct_field * struct. */ struct hlsl_reg { - /* Index of the first register allocated. */ + /* Register number of the first register allocated. */ uint32_t id; + /* For descriptors (buffer, texture, sampler, UAV) this is the base binding + * index of the descriptor. + * For 5.1 and above descriptors have space and may be arrayed, in which + * case the array shares a single register ID but has a range of register + * indices, and "id" and "index" are as a rule not equal. + * For versions below 5.1, the register number for descriptors is the same + * as its external binding index, so only "index" is used, and "id" is + * ignored. + * For numeric registers "index" is not used. */ + uint32_t index; + /* Register space of a descriptor. Not used for numeric registers. */ + uint32_t space; /* Number of registers to be allocated. * Unlike the variable's type's regsize, it is not expressed in register components, but rather * in whole registers, and may depend on which components are used within the shader. */ @@ -371,6 +388,7 @@ struct hlsl_attribute #define HLSL_STORAGE_LINEAR 0x00010000 #define HLSL_MODIFIER_SINGLE 0x00020000 #define HLSL_MODIFIER_EXPORT 0x00040000 +#define HLSL_STORAGE_ANNOTATION 0x00080000 #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ @@ -396,6 +414,14 @@ struct hlsl_reg_reservation unsigned int offset_index; }; +union hlsl_constant_value_component +{ + uint32_t u; + int32_t i; + float f; + double d; +}; + struct hlsl_ir_var { struct hlsl_type *data_type; @@ -418,6 +444,15 @@ struct hlsl_ir_var /* Scope that contains annotations for this variable. */ struct hlsl_scope *annotations; + /* Array of default values the variable was initialized with, one for each component. + * Only for variables that need it, such as uniforms and variables inside constant buffers. + * This pointer is NULL for others. */ + struct hlsl_default_value + { + /* Default value, in case the component is a numeric value. */ + union hlsl_constant_value_component value; + } *default_values; + /* A dynamic array containing the state block on the variable's declaration, if any. * An array variable may contain multiple state blocks. * A technique pass will always contain one. @@ -460,6 +495,8 @@ struct hlsl_ir_var uint32_t is_uniform : 1; uint32_t is_param : 1; uint32_t is_separated_resource : 1; + uint32_t is_synthetic : 1; + uint32_t has_explicit_bind_point : 1; }; /* This struct is used to represent assignments in state block entries: @@ -470,22 +507,31 @@ struct hlsl_ir_var * name[lhs_index] = args[0] * - or - * name[lhs_index] = {args[0], args[1], ...}; + * + * This struct also represents function call syntax: + * name(args[0], args[1], ...) */ struct hlsl_state_block_entry { - /* For assignments, the name in the lhs. */ + /* Whether this entry is a function call. */ + bool is_function_call; + + /* For assignments, the name in the lhs. + * For functions, the name of the function. */ char *name; /* Resolved format-specific property identifier. */ unsigned int name_id; - /* Whether the lhs in the assignment is indexed and, in that case, its index. */ + /* For assignments, whether the lhs of an assignment is indexed and, in + * that case, its index. */ bool lhs_has_index; unsigned int lhs_index; - /* Instructions present in the rhs. */ + /* Instructions present in the rhs or the function arguments. */ struct hlsl_block *instrs; - /* For assignments, arguments of the rhs initializer. */ + /* For assignments, arguments of the rhs initializer. + * For function calls, the arguments themselves. */ struct hlsl_src *args; unsigned int args_count; }; @@ -556,12 +602,21 @@ struct hlsl_ir_if struct hlsl_block else_block; }; +enum hlsl_ir_loop_unroll_type +{ + HLSL_IR_LOOP_UNROLL, + HLSL_IR_LOOP_FORCE_UNROLL, + HLSL_IR_LOOP_FORCE_LOOP +}; + struct hlsl_ir_loop { struct hlsl_ir_node node; /* loop condition is stored in the body (as "if (!condition) break;") */ struct hlsl_block body; unsigned int next_index; /* liveness index of the end of the loop */ + unsigned int unroll_limit; + enum hlsl_ir_loop_unroll_type unroll_type; }; struct hlsl_ir_switch_case @@ -583,6 +638,7 @@ struct hlsl_ir_switch enum hlsl_ir_expr_op { HLSL_OP0_VOID, + HLSL_OP0_RASTERIZER_SAMPLE_COUNT, HLSL_OP1_ABS, HLSL_OP1_BIT_NOT, @@ -775,13 +831,7 @@ struct hlsl_ir_constant struct hlsl_ir_node node; struct hlsl_constant_value { - union hlsl_constant_value_component - { - uint32_t u; - int32_t i; - float f; - double d; - } u[4]; + union hlsl_constant_value_component u[4]; } value; /* Constant register of type 'c' where the constant value is stored for SM1. */ struct hlsl_reg reg; @@ -811,6 +861,8 @@ struct hlsl_scope bool loop; /* The scope was created for the switch statement. */ bool _switch; + /* The scope contains annotation variables. */ + bool annotations; }; struct hlsl_profile_info @@ -1249,6 +1301,10 @@ void hlsl_block_cleanup(struct hlsl_block *block); bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); +void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); + +bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, + const struct vkd3d_shader_location *loc); void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, @@ -1259,7 +1315,9 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); void hlsl_cleanup_deref(struct hlsl_deref *deref); + void hlsl_cleanup_semantic(struct hlsl_semantic *semantic); +bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src); void hlsl_cleanup_ir_switch_cases(struct list *cases); void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c); @@ -1342,7 +1400,7 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - struct hlsl_block *block, const struct vkd3d_shader_location *loc); + struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, @@ -1361,6 +1419,7 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ unsigned int sample_count); struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, bool rasteriser_ordered); +struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format); struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, @@ -1432,10 +1491,16 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); -bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); -bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); -int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); +bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, + unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); +bool hlsl_sm1_usage_from_semantic(const char *semantic_name, + uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); + +void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); +int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, + struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index a5923d8bf8e..55993dac2b4 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -49,7 +49,7 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); RESERVED1 auto|catch|char|class|const_cast|delete|dynamic_cast|enum RESERVED2 explicit|friend|goto|long|mutable|new|operator|private|protected|public RESERVED3 reinterpret_cast|short|signed|sizeof|static_cast|template|this|throw|try -RESERVED4 typename|union|unsigned|using|virtual +RESERVED4 typename|union|using|virtual WS [ \t] NEWLINE (\n)|(\r\n) @@ -164,6 +164,7 @@ textureCUBE {return KW_TEXTURECUBE; } TextureCubeArray {return KW_TEXTURECUBEARRAY; } true {return KW_TRUE; } typedef {return KW_TYPEDEF; } +unsigned {return KW_UNSIGNED; } uniform {return KW_UNIFORM; } vector {return KW_VECTOR; } VertexShader {return KW_VERTEXSHADER; } @@ -197,7 +198,9 @@ while {return KW_WHILE; } struct hlsl_ctx *ctx = yyget_extra(yyscanner); yylval->name = hlsl_strdup(ctx, yytext); - if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) + if (hlsl_version_ge(ctx, 5, 1) && !strcmp(yytext, "ConstantBuffer")) + return KW_CONSTANTBUFFER; + else if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) return VAR_IDENTIFIER; else if (hlsl_get_type(ctx->cur_scope, yytext, true, true)) return TYPE_IDENTIFIER; diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 9c1bdef926d..7b058a65bc1 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -573,12 +573,91 @@ static void check_loop_attributes(struct hlsl_ctx *ctx, const struct parse_attri hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unroll attribute can't be used with 'fastopt' attribute."); } +static union hlsl_constant_value_component evaluate_static_expression(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) +{ + union hlsl_constant_value_component ret = {0}; + struct hlsl_ir_constant *constant; + struct hlsl_ir_node *node; + struct hlsl_block expr; + struct hlsl_src src; + + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { + switch (node->type) + { + case HLSL_IR_CONSTANT: + case HLSL_IR_EXPR: + case HLSL_IR_SWIZZLE: + case HLSL_IR_LOAD: + case HLSL_IR_INDEX: + continue; + case HLSL_IR_STORE: + if (hlsl_ir_store(node)->lhs.var->is_synthetic) + break; + /* fall-through */ + case HLSL_IR_CALL: + case HLSL_IR_IF: + case HLSL_IR_LOOP: + case HLSL_IR_JUMP: + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_SWITCH: + case HLSL_IR_STATEBLOCK_CONSTANT: + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Expected literal expression."); + break; + } + } + + if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) + return ret; + hlsl_block_add_block(&expr, block); + + if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) + { + hlsl_block_cleanup(&expr); + return ret; + } + + /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ + hlsl_src_from_node(&src, node_from_block(&expr)); + hlsl_run_const_passes(ctx, &expr); + node = src.node; + hlsl_src_remove(&src); + + if (node->type == HLSL_IR_CONSTANT) + { + constant = hlsl_ir_constant(node); + ret = constant->value.u[0]; + } + else + { + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Failed to evaluate constant expression."); + } + + hlsl_block_cleanup(&expr); + + return ret; +} + +static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct vkd3d_shader_location *loc) +{ + union hlsl_constant_value_component res; + + res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); + return res.u; +} + static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) { + enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; + unsigned int i, unroll_limit = 0; struct hlsl_ir_node *loop; - unsigned int i; if (attribute_list_has_duplicates(attributes)) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); @@ -591,18 +670,29 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct hlsl_attribute *attr = attributes->attrs[i]; if (!strcmp(attr->name, "unroll")) { - if (attr->args_count) + if (attr->args_count > 1) { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); + hlsl_warning(ctx, &attr->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, + "Ignoring 'unroll' attribute with more than 1 argument."); + continue; } - else + + if (attr->args_count == 1) { - hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); + struct hlsl_block expr; + hlsl_block_init(&expr); + if (!hlsl_clone_block(ctx, &expr, &attr->instrs)) + return NULL; + + unroll_limit = evaluate_static_expression_as_uint(ctx, &expr, loc); + hlsl_block_cleanup(&expr); } + + unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; } else if (!strcmp(attr->name, "loop")) { - /* TODO: this attribute will be used to disable unrolling, once it's implememented. */ + unroll_type = HLSL_IR_LOOP_FORCE_LOOP; } else if (!strcmp(attr->name, "fastopt") || !strcmp(attr->name, "allow_uav_condition")) @@ -631,7 +721,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, else list_move_head(&body->instrs, &cond->instrs); - if (!(loop = hlsl_new_loop(ctx, body, loc))) + if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) goto oom; hlsl_block_add_instr(init, loop); @@ -1013,6 +1103,10 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); } } + + if (hlsl_version_ge(ctx, 5, 1) && field->type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(field->type)) + hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); + vkd3d_free(v->arrays.sizes); field->loc = v->loc; field->name = v->name; @@ -1210,12 +1304,42 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl return true; } -static bool parse_reservation_index(const char *string, char *type, uint32_t *index) +static bool parse_reservation_index(struct hlsl_ctx *ctx, const char *string, unsigned int bracket_offset, + struct hlsl_reg_reservation *reservation) { - if (!sscanf(string + 1, "%u", index)) - return false; + char *endptr; + + reservation->reg_type = ascii_tolower(string[0]); + + /* Prior to SM5.1, fxc simply ignored bracket offsets for 'b' types. */ + if (reservation->reg_type == 'b' && hlsl_version_lt(ctx, 5, 1)) + { + bracket_offset = 0; + } + + if (string[1] == '\0') + { + reservation->reg_index = bracket_offset; + return true; + } + + reservation->reg_index = strtoul(string + 1, &endptr, 10) + bracket_offset; + + if (*endptr) + { + /* fxc for SM >= 4 treats all parse failures for 'b' types as successes, + * setting index to -1. It will later fail while validating slot limits. */ + if (reservation->reg_type == 'b' && hlsl_version_ge(ctx, 4, 0)) + { + reservation->reg_index = -1; + return true; + } + + /* All other types tolerate leftover characters. */ + if (endptr == string + 1) + return false; + } - *type = ascii_tolower(string[0]); return true; } @@ -1286,72 +1410,6 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * return block; } -static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_constant *constant; - struct hlsl_ir_node *node; - struct hlsl_block expr; - unsigned int ret = 0; - struct hlsl_src src; - - LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) - { - switch (node->type) - { - case HLSL_IR_CONSTANT: - case HLSL_IR_EXPR: - case HLSL_IR_SWIZZLE: - case HLSL_IR_LOAD: - case HLSL_IR_INDEX: - continue; - case HLSL_IR_CALL: - case HLSL_IR_IF: - case HLSL_IR_LOOP: - case HLSL_IR_JUMP: - case HLSL_IR_RESOURCE_LOAD: - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_STORE: - case HLSL_IR_SWITCH: - case HLSL_IR_STATEBLOCK_CONSTANT: - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Expected literal expression."); - } - } - - if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) - return 0; - hlsl_block_add_block(&expr, block); - - if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), - hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) - { - hlsl_block_cleanup(&expr); - return 0; - } - - /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ - hlsl_src_from_node(&src, node_from_block(&expr)); - hlsl_run_const_passes(ctx, &expr); - node = src.node; - hlsl_src_remove(&src); - - if (node->type == HLSL_IR_CONSTANT) - { - constant = hlsl_ir_constant(node); - ret = constant->value.u[0].u; - } - else - { - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Failed to evaluate constant expression."); - } - - hlsl_block_cleanup(&expr); - - return ret; -} - static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) { /* Scalar vars can be converted to pretty much everything */ @@ -1862,12 +1920,57 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned return true; } +static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width) +{ + /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y. + * components are indexed by their sources. i.e. the first component comes from the first + * component of the rhs. */ + unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; + + /* First, we filter the swizzle to remove components that aren't enabled by writemask. */ + for (i = 0; i < 4; ++i) + { + if (*writemask & (1 << i)) + { + unsigned int s = (*swizzle >> (i * 8)) & 0xff; + unsigned int x = s & 0xf, y = (s >> 4) & 0xf; + unsigned int idx = x + y * 4; + new_swizzle |= s << (bit++ * 8); + if (new_writemask & (1 << idx)) + return false; + new_writemask |= 1 << idx; + } + } + width = bit; + + /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the + * incoming vector. */ + bit = 0; + for (i = 0; i < 16; ++i) + { + for (j = 0; j < width; ++j) + { + unsigned int s = (new_swizzle >> (j * 8)) & 0xff; + unsigned int x = s & 0xf, y = (s >> 4) & 0xf; + unsigned int idx = x + y * 4; + if (idx == i) + inverted |= j << (bit++ * 2); + } + } + + *swizzle = inverted; + *writemask = new_writemask; + *ret_width = width; + return true; +} + static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) { struct hlsl_type *lhs_type = lhs->data_type; struct hlsl_ir_node *copy; - unsigned int writemask = 0; + unsigned int writemask = 0, width = 0; + bool matrix_writemask = false; if (assign_op == ASSIGN_OP_SUB) { @@ -1885,7 +1988,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo } if (hlsl_is_numeric_type(lhs_type)) + { writemask = (1 << lhs_type->dimx) - 1; + width = lhs_type->dimx; + } if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) return NULL; @@ -1902,12 +2008,24 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); struct hlsl_ir_node *new_swizzle; uint32_t s = swizzle->swizzle; - unsigned int width; - if (lhs->data_type->class == HLSL_CLASS_MATRIX) - hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); + assert(!matrix_writemask); - if (!invert_swizzle(&s, &writemask, &width)) + if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) + { + if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) + { + hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); + return NULL; + } + if (!invert_swizzle_matrix(&s, &writemask, &width)) + { + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); + return NULL; + } + matrix_writemask = true; + } + else if (!invert_swizzle(&s, &writemask, &width)) { hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); return NULL; @@ -1955,7 +2073,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); - if (writemask != ((1u << resource_type->e.resource.format->dimx) - 1)) + if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy) hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Resource store expressions must write to all components."); @@ -1971,12 +2089,50 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&resource_deref); } + else if (matrix_writemask) + { + struct hlsl_deref deref; + unsigned int i, j, k = 0; + + hlsl_init_deref_from_index_chain(ctx, &deref, lhs); + + for (i = 0; i < lhs->data_type->dimy; ++i) + { + for (j = 0; j < lhs->data_type->dimx; ++j) + { + struct hlsl_ir_node *load; + struct hlsl_block store_block; + const unsigned int idx = i * 4 + j; + const unsigned int component = i * lhs->data_type->dimx + j; + + if (!(writemask & (1 << idx))) + continue; + + if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) + { + hlsl_cleanup_deref(&deref); + return NULL; + } + + if (!hlsl_new_store_component(ctx, &store_block, &deref, component, load)) + { + hlsl_cleanup_deref(&deref); + return NULL; + } + hlsl_block_add_block(block, &store_block); + } + } + + hlsl_cleanup_deref(&deref); + } else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) { struct hlsl_ir_index *row = hlsl_ir_index(lhs); struct hlsl_ir_node *mat = row->val.node; unsigned int i, k = 0; + assert(!matrix_writemask); + for (i = 0; i < mat->data_type->dimx; ++i) { struct hlsl_ir_node *cell, *load, *store, *c; @@ -2067,6 +2223,53 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d return true; } +/* For some reason, for matrices, values from default value initializers end up in different + * components than from regular initializers. Default value initializers fill the matrix in + * vertical reading order (left-to-right top-to-bottom) instead of regular reading order + * (top-to-bottom left-to-right), so they have to be adjusted. */ +static unsigned int get_component_index_from_default_initializer_index(struct hlsl_ctx *ctx, + struct hlsl_type *type, unsigned int index) +{ + unsigned int element_comp_count, element, x, y, i; + unsigned int base = 0; + + if (ctx->profile->major_version < 4) + return index; + + if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) + return index; + + switch (type->class) + { + case HLSL_CLASS_MATRIX: + x = index / type->dimy; + y = index % type->dimy; + return y * type->dimx + x; + + case HLSL_CLASS_ARRAY: + element_comp_count = hlsl_type_component_count(type->e.array.type); + element = index / element_comp_count; + base = element * element_comp_count; + return base + get_component_index_from_default_initializer_index(ctx, type->e.array.type, index - base); + + case HLSL_CLASS_STRUCT: + for (i = 0; i < type->e.record.field_count; ++i) + { + struct hlsl_type *field_type = type->e.record.fields[i].type; + + element_comp_count = hlsl_type_component_count(field_type); + if (index - base < element_comp_count) + return base + get_component_index_from_default_initializer_index(ctx, field_type, index - base); + base += element_comp_count; + } + break; + + default: + return index; + } + vkd3d_unreachable(); +} + static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) { @@ -2087,12 +2290,29 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) - return; + if (dst->default_values) + { + struct hlsl_default_value default_value = {0}; + unsigned int dst_index; - if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) - return; - hlsl_block_add_block(instrs, &block); + if (!hlsl_clone_block(ctx, &block, instrs)) + return; + default_value.value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); + + dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); + dst->default_values[dst_index] = default_value; + + hlsl_block_cleanup(&block); + } + else + { + if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) + return; + + if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) + return; + hlsl_block_add_block(instrs, &block); + } ++*store_index; } @@ -2171,6 +2391,7 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) struct hlsl_semantic new_semantic; uint32_t modifiers = v->modifiers; bool unbounded_res_array = false; + bool constant_buffer = false; struct hlsl_ir_var *var; struct hlsl_type *type; bool local = true; @@ -2190,6 +2411,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); } + if (type->class == HLSL_CLASS_CONSTANT_BUFFER) + { + type = type->e.resource.format; + constant_buffer = true; + } + if (unbounded_res_array) { if (v->arrays.count == 1) @@ -2246,17 +2473,22 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) } } + if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) + { + /* SM 5.1/6.x descriptor arrays act differently from previous versions. + * Not only are they treated as a single object in reflection, but they + * act as a single component for the purposes of assignment and + * initialization. */ + hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); + } + if (!(var_name = vkd3d_strdup(v->name))) return; - new_semantic = v->semantic; - if (v->semantic.name) + if (!hlsl_clone_semantic(ctx, &new_semantic, &v->semantic)) { - if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) - { - vkd3d_free(var_name); - return; - } + vkd3d_free(var_name); + return; } if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) @@ -2266,7 +2498,16 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) return; } - var->buffer = ctx->cur_buffer; + if (constant_buffer && ctx->cur_scope == ctx->globals) + { + if (!(var_name = vkd3d_strdup(v->name))) + return; + var->buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, var_name, modifiers, &v->reg_reservation, NULL, &v->loc); + } + else + { + var->buffer = ctx->cur_buffer; + } if (var->buffer == ctx->globals_buffer) { @@ -2289,8 +2530,11 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) if (!(modifiers & HLSL_STORAGE_STATIC)) var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if ((ctx->profile->major_version < 5 || ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) + && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + { check_invalid_object_fields(ctx, var); + } if ((func = hlsl_get_first_func_decl(ctx, var->name))) { @@ -2348,6 +2592,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var { struct parse_variable_def *v, *v_next; struct hlsl_block *initializers; + unsigned int component_count; struct hlsl_ir_var *var; struct hlsl_type *type; @@ -2371,6 +2616,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var } type = var->data_type; + component_count = hlsl_type_component_count(type); var->state_blocks = v->state_blocks; var->state_block_count = v->state_block_count; @@ -2379,51 +2625,78 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var v->state_block_capacity = 0; v->state_blocks = NULL; - if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) + if (var->state_blocks && component_count != var->state_block_count) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Expected %u state blocks, but got %u.", - hlsl_type_component_count(type), var->state_block_count); + "Expected %u state blocks, but got %u.", component_count, var->state_block_count); free_parse_variable_def(v); continue; } if (v->initializer.args_count) { - if (v->initializer.braces) + unsigned int store_index = 0; + bool is_default_values_initializer; + unsigned int size, k; + + is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) + || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) + || ctx->cur_scope->annotations; + + if (is_default_values_initializer) { - unsigned int size = initializer_size(&v->initializer); - unsigned int store_index = 0; - unsigned int k; + /* Default values might have been allocated already for another variable of the same name, + in the same scope. */ + if (var->default_values) + { + free_parse_variable_def(v); + continue; + } - if (hlsl_type_component_count(type) != size) + if (!(var->default_values = hlsl_calloc(ctx, component_count, sizeof(*var->default_values)))) { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Expected %u components in initializer, but got %u.", - hlsl_type_component_count(type), size); free_parse_variable_def(v); continue; } + } - for (k = 0; k < v->initializer.args_count; ++k) + if (!v->initializer.braces) + { + if (!(add_implicit_conversion(ctx, v->initializer.instrs, v->initializer.args[0], type, &v->loc))) { - initialize_var_components(ctx, v->initializer.instrs, var, - &store_index, v->initializer.args[k]); + free_parse_variable_def(v); + continue; } + + v->initializer.args[0] = node_from_block(v->initializer.instrs); } - else + + size = initializer_size(&v->initializer); + if (component_count != size) { - struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected %u components in initializer, but got %u.", component_count, size); + free_parse_variable_def(v); + continue; + } - assert(v->initializer.args_count == 1); - hlsl_block_add_instr(v->initializer.instrs, &load->node); - add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); + for (k = 0; k < v->initializer.args_count; ++k) + { + initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]); } - if (var->storage_modifiers & HLSL_STORAGE_STATIC) + if (is_default_values_initializer) + { + hlsl_dump_var_default_values(var); + } + else if (var->storage_modifiers & HLSL_STORAGE_STATIC) + { hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); + } else + { hlsl_block_add_block(initializers, v->initializer.instrs); + } } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { @@ -3353,6 +3626,34 @@ static bool intrinsic_exp2(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, arg, loc); } +static bool intrinsic_faceforward(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type; + char *body; + + static const char template[] = + "%s faceforward(%s n, %s i, %s ng)\n" + "{\n" + " return dot(i, ng) < 0 ? n : -n;\n" + "}\n"; + + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); + + if (!(body = hlsl_sprintf_alloc(ctx, template, + type->name, type->name, type->name, type->name))) + return false; + func = hlsl_compile_internal_function(ctx, "faceforward", body); + vkd3d_free(body); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + static bool intrinsic_floor(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4032,6 +4333,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) { + unsigned int sampler_dim = hlsl_sampler_dim_count(dim); struct hlsl_resource_load_params load_params = { 0 }; const struct hlsl_type *sampler_type; struct hlsl_ir_node *coords, *sample; @@ -4043,11 +4345,6 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * return false; } - if (params->args_count == 4) - { - hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); - } - sampler_type = params->args[0]->data_type; if (sampler_type->class != HLSL_CLASS_SAMPLER || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) @@ -4061,18 +4358,22 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * hlsl_release_string_buffer(ctx, string); } - if (!strcmp(name, "tex2Dlod")) + if (!strcmp(name, "tex2Dbias") + || !strcmp(name, "tex2Dlod")) { struct hlsl_ir_node *lod, *c; - load_params.type = HLSL_RESOURCE_SAMPLE_LOD; + if (!strcmp(name, "tex2Dlod")) + load_params.type = HLSL_RESOURCE_SAMPLE_LOD; + else + load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; - if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), hlsl_sampler_dim_count(dim), params->args[1], loc))) + if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, params->args[1], loc))) return false; hlsl_block_add_instr(params->instrs, c); - if (!(coords = add_implicit_conversion(ctx, params->instrs, c, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, - hlsl_sampler_dim_count(dim)), loc))) + if (!(coords = add_implicit_conversion(ctx, params->instrs, c, + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) { return false; } @@ -4099,14 +4400,13 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * if (hlsl_version_ge(ctx, 4, 0)) { - unsigned int count = hlsl_sampler_dim_count(dim); struct hlsl_ir_node *divisor; - if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), count, coords, loc))) + if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), sampler_dim, coords, loc))) return false; hlsl_block_add_instr(params->instrs, divisor); - if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), count, coords, loc))) + if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, coords, loc))) return false; hlsl_block_add_instr(params->instrs, coords); @@ -4120,12 +4420,34 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; } } + else if (params->args_count == 4) /* Gradient sampling. */ + { + if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + { + return false; + } + + if (!(load_params.ddx = add_implicit_conversion(ctx, params->instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + { + return false; + } + + if (!(load_params.ddy = add_implicit_conversion(ctx, params->instrs, params->args[3], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + { + return false; + } + + load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; + } else { load_params.type = HLSL_RESOURCE_SAMPLE; if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) { return false; } @@ -4181,12 +4503,30 @@ static bool intrinsic_tex1D(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "tex1D", HLSL_SAMPLER_DIM_1D); } +static bool intrinsic_tex1Dgrad(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex1Dgrad", HLSL_SAMPLER_DIM_1D); +} + static bool intrinsic_tex2D(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D); } +static bool intrinsic_tex2Dbias(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex2Dbias", HLSL_SAMPLER_DIM_2D); +} + +static bool intrinsic_tex2Dgrad(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex2Dgrad", HLSL_SAMPLER_DIM_2D); +} + static bool intrinsic_tex2Dlod(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4205,6 +4545,12 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); } +static bool intrinsic_tex3Dgrad(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex3Dgrad", HLSL_SAMPLER_DIM_3D); +} + static bool intrinsic_tex3Dproj(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4217,6 +4563,12 @@ static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); } +static bool intrinsic_texCUBEgrad(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "texCUBEgrad", HLSL_SAMPLER_DIM_CUBE); +} + static bool intrinsic_texCUBEproj(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4336,6 +4688,20 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, return true; } +static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_node *expr; + + if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, + operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) + return false; + hlsl_block_add_instr(params->instrs, expr); + + return true; +} + static const struct intrinsic_function { const char *name; @@ -4348,6 +4714,7 @@ intrinsic_functions[] = { /* Note: these entries should be kept in alphabetical order. */ {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, + {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, {"abs", 1, true, intrinsic_abs}, {"acos", 1, true, intrinsic_acos}, {"all", 1, true, intrinsic_all}, @@ -4375,6 +4742,7 @@ intrinsic_functions[] = {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, {"exp2", 1, true, intrinsic_exp2}, + {"faceforward", 3, true, intrinsic_faceforward}, {"floor", 1, true, intrinsic_floor}, {"fmod", 2, true, intrinsic_fmod}, {"frac", 1, true, intrinsic_frac}, @@ -4406,12 +4774,17 @@ intrinsic_functions[] = {"tan", 1, true, intrinsic_tan}, {"tanh", 1, true, intrinsic_tanh}, {"tex1D", -1, false, intrinsic_tex1D}, + {"tex1Dgrad", 4, false, intrinsic_tex1Dgrad}, {"tex2D", -1, false, intrinsic_tex2D}, + {"tex2Dbias", 2, false, intrinsic_tex2Dbias}, + {"tex2Dgrad", 4, false, intrinsic_tex2Dgrad}, {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, {"tex3D", -1, false, intrinsic_tex3D}, + {"tex3Dgrad", 4, false, intrinsic_tex3Dgrad}, {"tex3Dproj", 2, false, intrinsic_tex3Dproj}, {"texCUBE", -1, false, intrinsic_texCUBE}, + {"texCUBEgrad", 4, false, intrinsic_texCUBEgrad}, {"texCUBEproj", 2, false, intrinsic_texCUBEproj}, {"transpose", 1, true, intrinsic_transpose}, {"trunc", 1, true, intrinsic_trunc}, @@ -5481,6 +5854,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %token KW_BREAK %token KW_BUFFER %token KW_CASE +%token KW_CONSTANTBUFFER %token KW_CBUFFER %token KW_CENTROID %token KW_COLUMN_MAJOR @@ -5566,6 +5940,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %token KW_TEXTURECUBEARRAY %token KW_TRUE %token KW_TYPEDEF +%token KW_UNSIGNED %token KW_UNIFORM %token KW_VECTOR %token KW_VERTEXSHADER @@ -5670,6 +6045,8 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %type if_body +%type array + %type var_modifiers %type any_identifier @@ -5717,8 +6094,7 @@ hlsl_prog: | hlsl_prog buffer_declaration buffer_body | hlsl_prog declaration_statement { - if (!list_empty(&$2->instrs)) - hlsl_fixme(ctx, &@2, "Uniform initializer."); + hlsl_block_add_block(&ctx->static_initializers, $2); destroy_block($2); } | hlsl_prog preproc_directive @@ -5742,19 +6118,31 @@ pass: annotations_list: variables_def_typed ';' + { + struct hlsl_block *block; + + block = initialize_vars(ctx, $1); + destroy_block(block); + } | annotations_list variables_def_typed ';' + { + struct hlsl_block *block; + + block = initialize_vars(ctx, $2); + destroy_block(block); + } annotations_opt: %empty { $$ = NULL; } - | '<' scope_start '>' + | '<' annotations_scope_start '>' { hlsl_pop_scope(ctx); $$ = NULL; } - | '<' scope_start annotations_list '>' + | '<' annotations_scope_start annotations_list '>' { struct hlsl_scope *scope = ctx->cur_scope; @@ -6282,6 +6670,13 @@ switch_scope_start: ctx->cur_scope->_switch = true; } +annotations_scope_start: + %empty + { + hlsl_push_scope(ctx); + ctx->cur_scope->annotations = true; + } + var_identifier: VAR_IDENTIFIER | NEW_IDENTIFIER @@ -6315,6 +6710,9 @@ semantic: { char *p; + if (!($$.raw_name = hlsl_strdup(ctx, $2))) + YYABORT; + for (p = $2 + strlen($2); p > $2 && isdigit(p[-1]); --p) ; $$.name = $2; @@ -6330,22 +6728,34 @@ register_reservation: ':' KW_REGISTER '(' any_identifier ')' { memset(&$$, 0, sizeof($$)); - if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) + if (!parse_reservation_index(ctx, $4, 0, &$$)) + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $4); + + vkd3d_free($4); + } + | ':' KW_REGISTER '(' any_identifier '[' expr ']' ')' + { + memset(&$$, 0, sizeof($$)); + if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) + { hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "Invalid register reservation '%s'.", $4); + } vkd3d_free($4); + vkd3d_free($6); } | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' { memset(&$$, 0, sizeof($$)); - if (parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) + if (parse_reservation_index(ctx, $6, 0, &$$)) { hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); } else if (parse_reservation_space($6, &$$.reg_space)) { - if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) + if (!parse_reservation_index(ctx, $4, 0, &$$)) hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "Invalid register reservation '%s'.", $4); } @@ -6358,12 +6768,45 @@ register_reservation: vkd3d_free($4); vkd3d_free($6); } + | ':' KW_REGISTER '(' any_identifier '[' expr ']' ',' any_identifier ')' + { + memset(&$$, 0, sizeof($$)); + + if (!parse_reservation_space($9, &$$.reg_space)) + hlsl_error(ctx, &@9, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register space reservation '%s'.", $9); + + if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) + { + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $4); + } + + vkd3d_free($4); + vkd3d_free($6); + vkd3d_free($9); + } + | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ')' + { + hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); + + memset(&$$, 0, sizeof($$)); + if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) + { + hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $6); + } + + vkd3d_free($4); + vkd3d_free($6); + vkd3d_free($8); + } | ':' KW_REGISTER '(' any_identifier ',' any_identifier ',' any_identifier ')' { hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); memset(&$$, 0, sizeof($$)); - if (!parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) + if (!parse_reservation_index(ctx, $6, 0, &$$)) hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "Invalid register reservation '%s'.", $6); @@ -6375,6 +6818,26 @@ register_reservation: vkd3d_free($6); vkd3d_free($8); } + | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ',' any_identifier ')' + { + hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); + + memset(&$$, 0, sizeof($$)); + if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) + { + hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $6); + } + + if (!parse_reservation_space($11, &$$.reg_space)) + hlsl_error(ctx, &@11, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register space reservation '%s'.", $11); + + vkd3d_free($4); + vkd3d_free($6); + vkd3d_free($8); + vkd3d_free($11); + } packoffset_reservation: ':' KW_PACKOFFSET '(' any_identifier ')' @@ -6449,8 +6912,13 @@ parameter: } type = hlsl_new_array_type(ctx, type, $4.sizes[i]); } + vkd3d_free($4.sizes); + $$.type = type; + if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) + hlsl_fixme(ctx, &@2, "Shader model 5.1+ resource array."); + $$.name = $3; $$.semantic = $5.semantic; $$.reg_reservation = $5.reg_reservation; @@ -6713,6 +7181,26 @@ type_no_void: } vkd3d_free($1); } + | KW_UNSIGNED TYPE_IDENTIFIER + { + struct hlsl_type *type = hlsl_get_type(ctx->cur_scope, $2, true, true); + + if (hlsl_is_numeric_type(type) && type->e.numeric.type == HLSL_TYPE_INT) + { + if (!(type = hlsl_type_clone(ctx, type, 0, 0))) + YYABORT; + vkd3d_free((void *)type->name); + type->name = NULL; + type->e.numeric.type = HLSL_TYPE_UINT; + } + else + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "The 'unsigned' keyword can't be used with type %s.", $2); + } + + $$ = type; + } | KW_STRUCT TYPE_IDENTIFIER { $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); @@ -6724,6 +7212,10 @@ type_no_void: { $$ = hlsl_get_type(ctx->cur_scope, "RenderTargetView", true, true); } + | KW_DEPTHSTENCILSTATE + { + $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilState", true, true); + } | KW_DEPTHSTENCILVIEW { $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); @@ -6736,6 +7228,17 @@ type_no_void: { $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); } + | KW_CONSTANTBUFFER '<' type '>' + { + if ($3->class != HLSL_CLASS_STRUCT) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "ConstantBuffer<...> requires user-defined structure type."); + $$ = hlsl_new_cb_type(ctx, $3); + } + | KW_RASTERIZERSTATE + { + $$ = hlsl_get_type(ctx->cur_scope, "RasterizerState", true, true); + } type: type_no_void @@ -6932,6 +7435,34 @@ state_block: hlsl_src_from_node(&entry->args[i], $5.args[i]); vkd3d_free($5.args); + $$ = $1; + state_block_add_entry($$, entry); + } + | state_block any_identifier '(' func_arguments ')' ';' + { + struct hlsl_state_block_entry *entry; + unsigned int i; + + if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) + YYABORT; + + entry->is_function_call = true; + + entry->name = $2; + entry->lhs_has_index = false; + entry->lhs_index = 0; + + entry->instrs = $4.instrs; + + entry->args_count = $4.args_count; + if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) + YYABORT; + for (i = 0; i < entry->args_count; ++i) + hlsl_src_from_node(&entry->args[i], $4.args[i]); + vkd3d_free($4.args); + + hlsl_validate_state_block_entry(ctx, entry, &@4); + $$ = $1; state_block_add_entry($$, entry); } @@ -7020,52 +7551,43 @@ variable_def_typed: $$->modifiers_loc = @1; } -arrays: - %empty +array: + '[' ']' { - $$.sizes = NULL; - $$.count = 0; + $$ = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; } - | '[' expr ']' arrays + | '[' expr ']' { - uint32_t *new_array; - unsigned int size; - - size = evaluate_static_expression_as_uint(ctx, $2, &@2); - - destroy_block($2); + $$ = evaluate_static_expression_as_uint(ctx, $2, &@2); - $$ = $4; - - if (!size) + if (!$$) { hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, "Array size is not a positive integer constant."); - vkd3d_free($$.sizes); YYABORT; } - if (size > 65536) + if ($$ > 65536) { hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, - "Array size %u is not between 1 and 65536.", size); - vkd3d_free($$.sizes); + "Array size %u is not between 1 and 65536.", $$); YYABORT; } - if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) - { - vkd3d_free($$.sizes); - YYABORT; - } - $$.sizes = new_array; - $$.sizes[$$.count++] = size; + destroy_block($2); } - | '[' ']' arrays + +arrays: + %empty + { + $$.sizes = NULL; + $$.count = 0; + } + | array arrays { uint32_t *new_array; - $$ = $3; + $$ = $2; if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) { @@ -7074,7 +7596,7 @@ arrays: } $$.sizes = new_array; - $$.sizes[$$.count++] = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; + $$.sizes[$$.count++] = $1; } var_modifiers: @@ -7156,6 +7678,8 @@ var_modifiers: } | var_identifier var_modifiers { + $$ = $2; + if (!strcmp($1, "precise")) $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); else if (!strcmp($1, "single")) diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index bdb72a1fab9..7e4f168675e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -218,6 +218,14 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, uniform->is_uniform = 1; uniform->is_param = temp->is_param; uniform->buffer = temp->buffer; + if (temp->default_values) + { + /* Transfer default values from the temp to the uniform. */ + assert(!uniform->default_values); + assert(hlsl_type_component_count(temp->data_type) == hlsl_type_component_count(uniform->data_type)); + uniform->default_values = temp->default_values; + temp->default_values = NULL; + } if (!(new_name = hlsl_sprintf_alloc(ctx, "", temp->name))) return; @@ -312,7 +320,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir } } - if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) + if (!(hlsl_clone_semantic(ctx, &new_semantic, semantic))) { vkd3d_free(new_name); return NULL; @@ -1623,9 +1631,11 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, switch (type->class) { + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_TEXTURE: case HLSL_CLASS_UAV: @@ -1635,6 +1645,7 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, case HLSL_CLASS_MATRIX: case HLSL_CLASS_ARRAY: case HLSL_CLASS_STRUCT: + case HLSL_CLASS_CONSTANT_BUFFER: /* FIXME: Actually we shouldn't even get here, but we don't split * matrices yet. */ return false; @@ -1970,6 +1981,76 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc return progress; } +enum validation_result +{ + DEREF_VALIDATION_OK, + DEREF_VALIDATION_OUT_OF_BOUNDS, + DEREF_VALIDATION_NOT_CONSTANT, +}; + +static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx, + const struct hlsl_deref *deref) +{ + struct hlsl_type *type = deref->var->data_type; + unsigned int i; + + for (i = 0; i < deref->path_len; ++i) + { + struct hlsl_ir_node *path_node = deref->path[i].node; + unsigned int idx = 0; + + assert(path_node); + if (path_node->type != HLSL_IR_CONSTANT) + return DEREF_VALIDATION_NOT_CONSTANT; + + /* We should always have generated a cast to UINT. */ + assert(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value.u[0].u; + + switch (type->class) + { + case HLSL_CLASS_VECTOR: + if (idx >= type->dimx) + { + hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Vector index is out of bounds. %u/%u", idx, type->dimx); + return DEREF_VALIDATION_OUT_OF_BOUNDS; + } + break; + + case HLSL_CLASS_MATRIX: + if (idx >= hlsl_type_major_size(type)) + { + hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); + return DEREF_VALIDATION_OUT_OF_BOUNDS; + } + break; + + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) + { + hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); + return DEREF_VALIDATION_OUT_OF_BOUNDS; + } + break; + + case HLSL_CLASS_STRUCT: + break; + + default: + vkd3d_unreachable(); + } + + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } + + return DEREF_VALIDATION_OK; +} + static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, const char *usage) { @@ -1987,60 +2068,77 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct } } -static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - unsigned int start, count; - - if (instr->type == HLSL_IR_RESOURCE_LOAD) + switch (instr->type) { - struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - - if (!load->resource.var->is_uniform) + case HLSL_IR_RESOURCE_LOAD: { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Loaded resource must have a single uniform source."); + struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + + if (!load->resource.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Loaded resource must have a single uniform source."); + } + else if (validate_component_index_range_from_deref(ctx, &load->resource) == DEREF_VALIDATION_NOT_CONSTANT) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Loaded resource from \"%s\" must be determinable at compile time.", + load->resource.var->name); + note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); + } + + if (load->sampler.var) + { + if (!load->sampler.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Resource load sampler must have a single uniform source."); + } + else if (validate_component_index_range_from_deref(ctx, &load->sampler) == DEREF_VALIDATION_NOT_CONSTANT) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Resource load sampler from \"%s\" must be determinable at compile time.", + load->sampler.var->name); + note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); + } + } + break; } - else if (!hlsl_component_index_range_from_deref(ctx, &load->resource, &start, &count)) + case HLSL_IR_RESOURCE_STORE: { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Loaded resource from \"%s\" must be determinable at compile time.", - load->resource.var->name); - note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); - } + struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - if (load->sampler.var) - { - if (!load->sampler.var->is_uniform) + if (!store->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Resource load sampler must have a single uniform source."); + "Accessed resource must have a single uniform source."); } - else if (!hlsl_component_index_range_from_deref(ctx, &load->sampler, &start, &count)) + else if (validate_component_index_range_from_deref(ctx, &store->resource) == DEREF_VALIDATION_NOT_CONSTANT) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Resource load sampler from \"%s\" must be determinable at compile time.", - load->sampler.var->name); - note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); + "Accessed resource from \"%s\" must be determinable at compile time.", + store->resource.var->name); + note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); } + break; } - } - else if (instr->type == HLSL_IR_RESOURCE_STORE) - { - struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - - if (!store->resource.var->is_uniform) + case HLSL_IR_LOAD: { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Accessed resource must have a single uniform source."); + struct hlsl_ir_load *load = hlsl_ir_load(instr); + validate_component_index_range_from_deref(ctx, &load->src); + break; } - else if (!hlsl_component_index_range_from_deref(ctx, &store->resource, &start, &count)) + case HLSL_IR_STORE: { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Accessed resource from \"%s\" must be determinable at compile time.", - store->resource.var->name); - note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); + struct hlsl_ir_store *store = hlsl_ir_store(instr); + validate_component_index_range_from_deref(ctx, &store->lhs); + break; } + default: + break; } return false; @@ -2554,11 +2652,11 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in case HLSL_RESOURCE_RESINFO: case HLSL_RESOURCE_SAMPLE_CMP: case HLSL_RESOURCE_SAMPLE_CMP_LZ: - case HLSL_RESOURCE_SAMPLE_GRAD: case HLSL_RESOURCE_SAMPLE_INFO: return false; case HLSL_RESOURCE_SAMPLE: + case HLSL_RESOURCE_SAMPLE_GRAD: case HLSL_RESOURCE_SAMPLE_LOD: case HLSL_RESOURCE_SAMPLE_LOD_BIAS: case HLSL_RESOURCE_SAMPLE_PROJ: @@ -3815,15 +3913,16 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { + const struct hlsl_reg_reservation *reservation = &var->reg_reservation; unsigned int r; - if (var->reg_reservation.reg_type) + if (reservation->reg_type) { for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r) { if (var->regs[r].allocation_size > 0) { - if (var->reg_reservation.reg_type != get_regset_name(r)) + if (reservation->reg_type != get_regset_name(r)) { struct vkd3d_string_buffer *type_string; @@ -3839,10 +3938,8 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) else { var->regs[r].allocated = true; - var->regs[r].id = var->reg_reservation.reg_index; - TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, - var->reg_reservation.reg_index, var->reg_reservation.reg_type, - var->reg_reservation.reg_index + var->regs[r].allocation_size); + var->regs[r].space = reservation->reg_space; + var->regs[r].index = reservation->reg_index; } } } @@ -4181,8 +4278,10 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, { unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; + /* FIXME: We could potentially pack structs or arrays more efficiently... */ + if (type->class <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); + return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx); else return allocate_range(ctx, allocator, first_write, last_read, reg_size); } @@ -4589,6 +4688,7 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { + struct register_allocator allocator_used = {0}; struct register_allocator allocator = {0}; struct hlsl_ir_var *var; @@ -4597,6 +4697,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int bind_count = var->bind_count[HLSL_REGSET_NUMERIC]; if (!var->is_uniform || reg_size == 0) continue; @@ -4609,12 +4710,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi assert(reg_size % 4 == 0); for (i = 0; i < reg_size / 4; ++i) { - if (get_available_writemask(&allocator, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) + if (i < bind_count) { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Overlapping register() reservations on 'c%u'.", reg_idx + i); + if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Overlapping register() reservations on 'c%u'.", reg_idx + i); + } + record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); } - record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); } @@ -4627,6 +4731,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi } } + vkd3d_free(allocator_used.allocations); + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC]; @@ -4697,7 +4803,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var if (ctx->profile->major_version < 4) { - D3DSHADER_PARAM_REGISTER_TYPE sm1_type; + struct vkd3d_shader_version version; D3DDECLUSAGE usage; uint32_t usage_idx; @@ -4705,8 +4811,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) return; - builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &sm1_type, ®); - if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + builtin = hlsl_sm1_register_from_semantic(&version, + var->semantic.name, var->semantic.index, output, &type, ®); + if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); @@ -4715,7 +4825,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var if ((!output && !var->last_read) || (output && !var->first_write)) return; - type = (enum vkd3d_shader_register_type)sm1_type; } else { @@ -4762,13 +4871,14 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx) } } -static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t index) +static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t space, uint32_t index) { const struct hlsl_buffer *buffer; LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) { - if (buffer->used_size && buffer->reservation.reg_type == 'b' && buffer->reservation.reg_index == index) + if (buffer->reservation.reg_type == 'b' + && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index) return buffer; } return NULL; @@ -4783,6 +4893,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va if (register_reservation) { var->buffer_offset = 4 * var->reg_reservation.reg_index; + var->has_explicit_bind_point = 1; } else { @@ -4815,6 +4926,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va } } var->buffer_offset = var->reg_reservation.offset_index; + var->has_explicit_bind_point = 1; } else { @@ -4913,11 +5025,19 @@ void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx) } } +static unsigned int get_max_cbuffer_reg_index(struct hlsl_ctx *ctx) +{ + if (hlsl_version_ge(ctx, 5, 1)) + return UINT_MAX; + + return 13; +} + static void allocate_buffers(struct hlsl_ctx *ctx) { struct hlsl_buffer *buffer; + uint32_t index = 0, id = 0; struct hlsl_ir_var *var; - uint32_t index = 0; LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -4938,32 +5058,59 @@ static void allocate_buffers(struct hlsl_ctx *ctx) if (buffer->type == HLSL_BUFFER_CONSTANT) { - if (buffer->reservation.reg_type == 'b') + const struct hlsl_reg_reservation *reservation = &buffer->reservation; + + if (reservation->reg_type == 'b') { - const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.reg_index); + const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, + reservation->reg_space, reservation->reg_index); + unsigned int max_index = get_max_cbuffer_reg_index(ctx); + + if (buffer->reservation.reg_index > max_index) + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Buffer reservation cb%u exceeds target's maximum (cb%u).", + buffer->reservation.reg_index, max_index); if (reserved_buffer && reserved_buffer != buffer) { hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple buffers bound to cb%u.", buffer->reservation.reg_index); + "Multiple buffers bound to space %u, index %u.", + reservation->reg_space, reservation->reg_index); hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, - "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.reg_index); + "Buffer %s is already bound to space %u, index %u.", + reserved_buffer->name, reservation->reg_space, reservation->reg_index); } - buffer->reg.id = buffer->reservation.reg_index; + buffer->reg.space = reservation->reg_space; + buffer->reg.index = reservation->reg_index; + if (hlsl_version_ge(ctx, 5, 1)) + buffer->reg.id = id++; + else + buffer->reg.id = buffer->reg.index; buffer->reg.allocation_size = 1; buffer->reg.allocated = true; - TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); + TRACE("Allocated reserved %s to space %u, index %u, id %u.\n", + buffer->name, buffer->reg.space, buffer->reg.index, buffer->reg.id); } - else if (!buffer->reservation.reg_type) + else if (!reservation->reg_type) { - while (get_reserved_buffer(ctx, index)) + unsigned int max_index = get_max_cbuffer_reg_index(ctx); + while (get_reserved_buffer(ctx, 0, index)) ++index; - buffer->reg.id = index; + if (index > max_index) + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Too many buffers allocated, target's maximum is %u.", max_index); + + buffer->reg.space = 0; + buffer->reg.index = index; + if (hlsl_version_ge(ctx, 5, 1)) + buffer->reg.id = id++; + else + buffer->reg.id = buffer->reg.index; buffer->reg.allocation_size = 1; buffer->reg.allocated = true; - TRACE("Allocated %s to cb%u.\n", buffer->name, index); + TRACE("Allocated %s to space 0, index %u, id %u.\n", buffer->name, buffer->reg.index, buffer->reg.id); ++index; } else @@ -4980,7 +5127,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) } static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, - uint32_t index, bool allocated_only) + uint32_t space, uint32_t index, bool allocated_only) { const struct hlsl_ir_var *var; unsigned int start, count; @@ -4995,12 +5142,18 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum start = var->reg_reservation.reg_index; count = var->data_type->reg_size[regset]; + if (var->reg_reservation.reg_space != space) + continue; + if (!var->regs[regset].allocated && allocated_only) continue; } else if (var->regs[regset].allocated) { - start = var->regs[regset].id; + if (var->regs[regset].space != space) + continue; + + start = var->regs[regset].index; count = var->regs[regset].allocation_size; } else @@ -5017,8 +5170,8 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) { char regset_name = get_regset_name(regset); + uint32_t min_index = 0, id = 0; struct hlsl_ir_var *var; - uint32_t min_index = 0; if (regset == HLSL_REGSET_UAVS) { @@ -5041,35 +5194,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) if (var->regs[regset].allocated) { const struct hlsl_ir_var *reserved_object, *last_reported = NULL; - unsigned int index, i; + unsigned int i; - if (var->regs[regset].id < min_index) + if (var->regs[regset].index < min_index) { assert(regset == HLSL_REGSET_UAVS); hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "UAV index (%u) must be higher than the maximum render target index (%u).", - var->regs[regset].id, min_index - 1); + var->regs[regset].index, min_index - 1); continue; } for (i = 0; i < count; ++i) { - index = var->regs[regset].id + i; + unsigned int space = var->regs[regset].space; + unsigned int index = var->regs[regset].index + i; /* get_allocated_object() may return "var" itself, but we * actually want that, otherwise we'll end up reporting the * same conflict between the same two variables twice. */ - reserved_object = get_allocated_object(ctx, regset, index, true); + reserved_object = get_allocated_object(ctx, regset, space, index, true); if (reserved_object && reserved_object != var && reserved_object != last_reported) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple variables bound to %c%u.", regset_name, index); + "Multiple variables bound to space %u, %c%u.", regset_name, space, index); hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, - "Variable '%s' is already bound to %c%u.", reserved_object->name, - regset_name, index); + "Variable '%s' is already bound to space %u, %c%u.", + reserved_object->name, regset_name, space, index); last_reported = reserved_object; } } + + if (hlsl_version_ge(ctx, 5, 1)) + var->regs[regset].id = id++; + else + var->regs[regset].id = var->regs[regset].index; + TRACE("Allocated reserved variable %s to space %u, indices %c%u-%c%u, id %u.\n", + var->name, var->regs[regset].space, regset_name, var->regs[regset].index, + regset_name, var->regs[regset].index + count, var->regs[regset].id); } else { @@ -5078,7 +5240,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) while (available < count) { - if (get_allocated_object(ctx, regset, index, false)) + if (get_allocated_object(ctx, regset, 0, index, false)) available = 0; else ++available; @@ -5086,10 +5248,15 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) } index -= count; - var->regs[regset].id = index; + var->regs[regset].space = 0; + var->regs[regset].index = index; + if (hlsl_version_ge(ctx, 5, 1)) + var->regs[regset].id = id++; + else + var->regs[regset].id = var->regs[regset].index; var->regs[regset].allocated = true; - TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, - index + count); + TRACE("Allocated variable %s to space 0, indices %c%u-%c%u, id %u.\n", var->name, + regset_name, index, regset_name, index + count, var->regs[regset].id); ++index; } } @@ -5123,21 +5290,13 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl { case HLSL_CLASS_VECTOR: if (idx >= type->dimx) - { - hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Vector index is out of bounds. %u/%u", idx, type->dimx); return false; - } *start += idx; break; case HLSL_CLASS_MATRIX: if (idx >= hlsl_type_major_size(type)) - { - hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); return false; - } if (hlsl_type_is_row_major(type)) *start += idx * type->dimx; else @@ -5146,11 +5305,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl case HLSL_CLASS_ARRAY: if (idx >= type->e.array.elements_count) - { - hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); return false; - } *start += idx * hlsl_type_component_count(type->e.array.type); break; @@ -5295,6 +5450,7 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere assert(deref->data_type); assert(hlsl_is_numeric_type(deref->data_type)); + ret.index += offset / 4; ret.id += offset / 4; ret.writemask = 0xf & (0xf << (offset % 4)); @@ -5446,6 +5602,330 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) } while (progress); } +static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, + struct vsir_program *program, bool output, struct hlsl_ir_var *var) +{ + enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; + enum vkd3d_shader_register_type type; + struct shader_signature *signature; + struct signature_element *element; + unsigned int register_index, mask; + + if ((!output && !var->last_read) || (output && !var->first_write)) + return; + + if (output) + signature = &program->output_signature; + else + signature = &program->input_signature; + + if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, + signature->element_count + 1, sizeof(*signature->elements))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + element = &signature->elements[signature->element_count++]; + + if (!hlsl_sm1_register_from_semantic(&program->shader_version, + var->semantic.name, var->semantic.index, output, &type, ®ister_index)) + { + unsigned int usage, usage_idx; + bool ret; + + register_index = var->regs[HLSL_REGSET_NUMERIC].id; + + ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); + assert(ret); + /* With the exception of vertex POSITION output, none of these are + * system values. Pixel POSITION input is not equivalent to + * SV_Position; the closer equivalent is VPOS, which is not declared + * as a semantic. */ + if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX + && output && usage == VKD3D_DECL_USAGE_POSITION) + sysval = VKD3D_SHADER_SV_POSITION; + } + mask = (1 << var->data_type->dimx) - 1; + + memset(element, 0, sizeof(*element)); + if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) + { + --signature->element_count; + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + element->semantic_index = var->semantic.index; + element->sysval_semantic = sysval; + element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + element->register_index = register_index; + element->target_location = register_index; + element->register_count = 1; + element->mask = mask; + element->used_mask = mask; + if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) + element->interpolation_mode = VKD3DSIM_LINEAR; +} + +static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) +{ + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_input_semantic) + sm1_generate_vsir_signature_entry(ctx, program, false, var); + if (var->is_output_semantic) + sm1_generate_vsir_signature_entry(ctx, program, true, var); + } +} + +/* OBJECTIVE: Translate all the information from ctx and entry_func to the + * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() + * without relying on ctx and entry_func. */ +static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) +{ + struct vkd3d_shader_version version = {0}; + struct vkd3d_bytecode_buffer buffer = {0}; + + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + if (!vsir_program_init(program, &version, 0)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + write_sm1_uniforms(ctx, &buffer); + if (buffer.status) + { + vkd3d_free(buffer.data); + ctx->result = buffer.status; + return; + } + ctab->code = buffer.data; + ctab->size = buffer.size; + + sm1_generate_vsir_signature(ctx, program); +} + +static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, + struct hlsl_block **found_block) +{ + struct hlsl_ir_node *node; + + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { + if (node == stop_point) + return NULL; + + if (node->type == HLSL_IR_IF) + { + struct hlsl_ir_if *iff = hlsl_ir_if(node); + struct hlsl_ir_jump *jump = NULL; + + if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) + return jump; + if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) + return jump; + } + else if (node->type == HLSL_IR_JUMP) + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + + if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) + { + *found_block = block; + return jump; + } + } + } + + return NULL; +} + +static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) +{ + /* Always use the explicit limit if it has been passed. */ + if (loop->unroll_limit) + return loop->unroll_limit; + + /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ + if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + return 1024; + + /* SM4 limits implicit unrolling to 254 iterations. */ + if (hlsl_version_ge(ctx, 4, 0)) + return 254; + + /* SM<3 implicitly unrolls up to 1024 iterations. */ + return 1024; +} + +static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) +{ + unsigned int max_iterations, i; + + max_iterations = loop_unrolling_get_max_iterations(ctx, loop); + + for (i = 0; i < max_iterations; ++i) + { + struct hlsl_block tmp_dst, *jump_block; + struct hlsl_ir_jump *jump = NULL; + + if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) + return false; + list_move_before(&loop->node.entry, &tmp_dst.instrs); + hlsl_block_cleanup(&tmp_dst); + + hlsl_run_const_passes(ctx, block); + + if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block))) + { + enum hlsl_ir_jump_type type = jump->type; + + if (jump_block != loop_parent) + { + if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, + "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported."); + return false; + } + + list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry)); + hlsl_block_cleanup(&tmp_dst); + + if (type == HLSL_IR_JUMP_BREAK) + break; + } + } + + /* Native will not emit an error if max_iterations has been reached with an + * explicit limit. It also will not insert a loop if there are iterations left + * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ + if (!loop->unroll_limit && i == max_iterations) + { + if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, + "Unable to unroll loop, maximum iterations reached (%u).", max_iterations); + return false; + } + + list_remove(&loop->node.entry); + hlsl_free_instr(&loop->node); + + return true; +} + +/* + * loop_unrolling_find_unrollable_loop() is not the normal way to do things; + * normal passes simply iterate over the whole block and apply a transformation + * to every relevant instruction. However, loop unrolling can fail, and we want + * to leave the loop in its previous state in that case. That isn't a problem by + * itself, except that loop unrolling needs copy-prop in order to work properly, + * and copy-prop state at the time of the loop depends on the rest of the program + * up to that point. This means we need to clone the whole program, and at that + * point we have to search it again anyway to find the clone of the loop we were + * going to unroll. + * + * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop + * up until the loop instruction, clone just that loop, then use copyprop again + * with the saved state after unrolling. However, copyprop currently isn't built + * for that yet [notably, it still relies on indices]. Note also this still doesn't + * really let us use transform_ir() anyway [since we don't have a good way to say + * "copyprop from the beginning of the program up to the instruction we're + * currently processing" from the callback]; we'd have to use a dedicated + * recursive function instead. */ +static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_block **containing_block) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *nested_loop; + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) + return nested_loop; + + if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + { + *containing_block = block; + return loop; + } + + break; + } + case HLSL_IR_IF: + { + struct hlsl_ir_loop *loop; + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) + return loop; + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) + return loop; + + break; + } + case HLSL_IR_SWITCH: + { + struct hlsl_ir_switch *s = hlsl_ir_switch(instr); + struct hlsl_ir_switch_case *c; + struct hlsl_ir_loop *loop; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + { + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) + return loop; + } + + break; + } + default: + break; + } + } + + return NULL; +} + +static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + while (true) + { + struct hlsl_block clone, *containing_block; + struct hlsl_ir_loop *loop, *cloned_loop; + + if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) + return; + + if (!hlsl_clone_block(ctx, &clone, block)) + return; + + cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); + assert(cloned_loop); + + if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) + { + hlsl_block_cleanup(&clone); + loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; + continue; + } + + hlsl_block_cleanup(block); + hlsl_block_init(block); + hlsl_block_add_block(block, &clone); + } +} + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) { @@ -5532,6 +6012,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); } + transform_unroll_loops(ctx, body); hlsl_run_const_passes(ctx, body); remove_unreachable_code(ctx, body); @@ -5541,7 +6022,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_int_dot, body); - hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, validate_dereferences, body, NULL); hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); if (profile->major_version >= 4) hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); @@ -5628,7 +6109,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry switch (target_type) { case VKD3D_SHADER_TARGET_D3D_BYTECODE: - return hlsl_sm1_write(ctx, entry_func, out); + { + uint32_t config_flags = vkd3d_shader_init_config_flags(); + struct vkd3d_shader_code ctab = {0}; + struct vsir_program program; + int result; + + sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab); + if (ctx->result) + { + vsir_program_cleanup(&program); + vkd3d_shader_free_shader_code(&ctab); + return ctx->result; + } + + result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context, ctx, entry_func); + vsir_program_cleanup(&program); + vkd3d_shader_free_shader_code(&ctab); + return result; + } case VKD3D_SHADER_TARGET_DXBC_TPF: return hlsl_sm4_write(ctx, entry_func, out); diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index b3b745fc1b2..e5432cb35ce 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -46,9 +46,9 @@ static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shade static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) { - enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; - return (VKD3DSIH_DCL <= handler_idx && handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) - || handler_idx == VKD3DSIH_HS_DECLS; + enum vkd3d_shader_opcode opcode = instruction->opcode; + return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) + || opcode == VKD3DSIH_HS_DECLS; } static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) @@ -60,9 +60,9 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i static bool vsir_instruction_init_with_params(struct vsir_program *program, struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) + enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count) { - vsir_instruction_init(ins, location, handler_idx); + vsir_instruction_init(ins, location, opcode); ins->dst_count = dst_count; ins->src_count = src_count; @@ -287,7 +287,7 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro mul_ins = &instructions->elements[pos]; add_ins = &instructions->elements[pos + 1]; - mul_ins->handler_idx = VKD3DSIH_MUL; + mul_ins->opcode = VKD3DSIH_MUL; mul_ins->src_count = 2; if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) @@ -322,7 +322,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr { struct vkd3d_shader_instruction *ins = &instructions->elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_IFC: if ((ret = vsir_program_lower_ifc(program, ins, &tmp_idx, message_context)) < 0) @@ -492,26 +492,26 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal struct shader_phase_location *loc; bool b; - if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + if (ins->opcode == VKD3DSIH_HS_FORK_PHASE || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) { b = flattener_is_in_fork_or_join_phase(normaliser); /* Reset the phase info. */ normaliser->phase_body_idx = ~0u; - normaliser->phase = ins->handler_idx; + normaliser->phase = ins->opcode; normaliser->instance_count = 1; /* Leave the first occurrence and delete the rest. */ if (b) vkd3d_shader_instruction_make_nop(ins); return; } - else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT - || ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) + else if (ins->opcode == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT + || ins->opcode == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) { normaliser->instance_count = ins->declaration.count + !ins->declaration.count; vkd3d_shader_instruction_make_nop(ins); return; } - else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( + else if (ins->opcode == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( &ins->declaration.dst.reg)) { vkd3d_shader_instruction_make_nop(ins); @@ -524,7 +524,7 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal if (normaliser->phase_body_idx == ~0u) normaliser->phase_body_idx = index; - if (ins->handler_idx == VKD3DSIH_RET) + if (ins->opcode == VKD3DSIH_RET) { normaliser->last_ret_location = ins->location; vkd3d_shader_instruction_make_nop(ins); @@ -679,11 +679,11 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 } void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode handler_idx) + enum vkd3d_shader_opcode opcode) { memset(ins, 0, sizeof(*ins)); ins->location = *location; - ins->handler_idx = handler_idx; + ins->opcode = opcode; } static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, @@ -865,12 +865,12 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i { ins = &instructions->elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: - normaliser.phase = ins->handler_idx; + normaliser.phase = ins->opcode; break; default: if (vsir_instruction_is_dcl(ins)) @@ -888,7 +888,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i { ins = &instructions->elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: input_control_point_count = ins->declaration.count; @@ -1526,7 +1526,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi struct vkd3d_shader_register *reg; unsigned int i; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_DCL_INPUT: if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) @@ -1560,7 +1560,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: - normaliser->phase = ins->handler_idx; + normaliser->phase = ins->opcode; memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params)); memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params)); memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params)); @@ -1594,7 +1594,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program { ins = &program->instructions.elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: normaliser.output_control_point_count = ins->declaration.count; @@ -1608,7 +1608,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program /* fall through */ case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: - normaliser.phase = ins->handler_idx; + normaliser.phase = ins->opcode; break; default: break; @@ -1740,7 +1740,7 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) + if (ins->opcode == VKD3DSIH_DEF || ins->opcode == VKD3DSIH_DEFI || ins->opcode == VKD3DSIH_DEFB) { struct flat_constant_def *def; @@ -1779,7 +1779,7 @@ static void remove_dead_code(struct vsir_program *program) { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_IF: case VKD3DSIH_LOOP: @@ -1799,7 +1799,7 @@ static void remove_dead_code(struct vsir_program *program) { if (depth > 0) { - if (ins->handler_idx != VKD3DSIH_ELSE) + if (ins->opcode != VKD3DSIH_ELSE) --depth; vkd3d_shader_instruction_make_nop(ins); } @@ -1870,14 +1870,14 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; struct vkd3d_shader_src_param *srcs; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_TEX: if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) return VKD3D_ERROR_OUT_OF_MEMORY; memset(srcs, 0, sizeof(*srcs) * 3); - ins->handler_idx = VKD3DSIH_SAMPLE; + ins->opcode = VKD3DSIH_SAMPLE; srcs[0] = ins->src[0]; @@ -1899,13 +1899,42 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr ins->src_count = 3; break; + case VKD3DSIH_TEXLDD: + if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) + return VKD3D_ERROR_OUT_OF_MEMORY; + memset(srcs, 0, sizeof(*srcs) * 5); + + ins->opcode = VKD3DSIH_SAMPLE_GRAD; + + srcs[0] = ins->src[0]; + + srcs[1].reg.type = VKD3DSPR_RESOURCE; + srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; + srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; + srcs[1].reg.idx_count = 2; + srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; + srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; + srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; + + srcs[2].reg.type = VKD3DSPR_SAMPLER; + srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; + srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; + srcs[2].reg.idx_count = 2; + srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; + + srcs[3] = ins->src[2]; + srcs[4] = ins->src[3]; + + ins->src = srcs; + ins->src_count = 5; + break; + case VKD3DSIH_TEXBEM: case VKD3DSIH_TEXBEML: case VKD3DSIH_TEXCOORD: case VKD3DSIH_TEXDEPTH: case VKD3DSIH_TEXDP3: case VKD3DSIH_TEXDP3TEX: - case VKD3DSIH_TEXLDD: case VKD3DSIH_TEXLDL: case VKD3DSIH_TEXM3x2PAD: case VKD3DSIH_TEXM3x2TEX: @@ -1919,7 +1948,7 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr case VKD3DSIH_TEXREG2RGB: vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to not yet implemented feature: " - "Combined sampler instruction %#x.", ins->handler_idx); + "Combined sampler instruction %#x.", ins->opcode); return VKD3D_ERROR_NOT_IMPLEMENTED; default: @@ -2030,7 +2059,7 @@ static bool cf_flattener_copy_instruction(struct cf_flattener *flattener, { struct vkd3d_shader_instruction *dst_ins; - if (instruction->handler_idx == VKD3DSIH_NOP) + if (instruction->opcode == VKD3DSIH_NOP) return true; if (!(dst_ins = cf_flattener_require_space(flattener, 1))) @@ -2245,9 +2274,9 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte * phase instruction, and in all other shader types begins with the first label instruction. * Declaring an indexable temp with function scope is not considered a declaration, * because it needs to live inside a function. */ - if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) + if (!after_declarations_section && instruction->opcode != VKD3DSIH_NOP) { - bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP + bool is_function_indexable = instruction->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP && instruction->declaration.indexable_temp.has_function_scope; if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) @@ -2260,14 +2289,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte cf_info = flattener->control_flow_depth ? &flattener->control_flow_info[flattener->control_flow_depth - 1] : NULL; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: if (!cf_flattener_copy_instruction(flattener, instruction)) return VKD3D_ERROR_OUT_OF_MEMORY; - if (instruction->handler_idx != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) + if (instruction->opcode != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) after_declarations_section = false; break; @@ -2662,7 +2691,7 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; unsigned int case_count, j, default_label; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_LABEL: current_label = label_from_src_param(&ins->src[0]); @@ -2858,7 +2887,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ /* Only phi src/dst SSA values need be converted here. Structurisation may * introduce new cases of undominated SSA use, which will be handled later. */ - if (ins->handler_idx != VKD3DSIH_PHI) + if (ins->opcode != VKD3DSIH_PHI) continue; ++phi_count; @@ -2907,7 +2936,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ for (j = 0; j < ins->src_count; ++j) materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_LABEL: current_label = label_from_src_param(&ins->src[0]); @@ -3336,7 +3365,7 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) if (block->label == 0) continue; - switch (block->end->handler_idx) + switch (block->end->opcode) { case VKD3DSIH_RET: shape = "trapezium"; @@ -3478,7 +3507,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; bool finish = false; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_PHI: case VKD3DSIH_SWITCH_MONOLITHIC: @@ -3533,7 +3562,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program if (block->label == 0) continue; - switch (block->end->handler_idx) + switch (block->end->opcode) { case VKD3DSIH_RET: break; @@ -4192,7 +4221,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) structure->u.block = block; /* Generate between zero and two jump instructions. */ - switch (block->end->handler_idx) + switch (block->end->opcode) { case VKD3DSIH_BRANCH: { @@ -5049,7 +5078,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_LABEL: assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); @@ -5064,7 +5093,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); - TRACE("Structurizing phase %u of a hull shader.\n", ins->handler_idx); + TRACE("Structurizing phase %u of a hull shader.\n", ins->opcode); target.instructions[target.ins_count++] = *ins; ++i; if ((ret = vsir_program_structurize_function(program, message_context, @@ -5222,7 +5251,7 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_LABEL: assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); @@ -5237,7 +5266,7 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); - TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->handler_idx); + TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->opcode); ++i; if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( program, message_context, &i)) < 0) @@ -5641,7 +5670,7 @@ static void vsir_validate_dst_count(struct validation_context *ctx, if (instruction->dst_count != count) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, "Invalid destination count %u for an instruction of type %#x, expected %u.", - instruction->dst_count, instruction->handler_idx, count); + instruction->dst_count, instruction->opcode, count); } static void vsir_validate_src_count(struct validation_context *ctx, @@ -5650,7 +5679,7 @@ static void vsir_validate_src_count(struct validation_context *ctx, if (instruction->src_count != count) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, "Invalid source count %u for an instruction of type %#x, expected %u.", - instruction->src_count, instruction->handler_idx, count); + instruction->src_count, instruction->opcode, count); } static bool vsir_validate_src_min_count(struct validation_context *ctx, @@ -5660,7 +5689,7 @@ static bool vsir_validate_src_min_count(struct validation_context *ctx, { validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, "Invalid source count %u for an instruction of type %#x, expected at least %u.", - instruction->src_count, instruction->handler_idx, count); + instruction->src_count, instruction->opcode, count); return false; } @@ -5674,7 +5703,7 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, { validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, "Invalid source count %u for an instruction of type %#x, expected at most %u.", - instruction->src_count, instruction->handler_idx, count); + instruction->src_count, instruction->opcode, count); return false; } @@ -5701,7 +5730,7 @@ static void vsir_validate_cf_type(struct validation_context *ctx, assert(expected_type != CF_TYPE_UNKNOWN); if (ctx->cf_type != expected_type) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", - instruction->handler_idx, name_from_cf_type(ctx->cf_type)); + instruction->opcode, name_from_cf_type(ctx->cf_type)); } static void vsir_validate_instruction(struct validation_context *ctx) @@ -5718,13 +5747,13 @@ static void vsir_validate_instruction(struct validation_context *ctx) for (i = 0; i < instruction->src_count; ++i) vsir_validate_src_param(ctx, &instruction->src[i]); - if (instruction->handler_idx >= VKD3DSIH_INVALID) + if (instruction->opcode >= VKD3DSIH_INVALID) { validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.", - instruction->handler_idx); + instruction->opcode); } - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_HS_DECLS: case VKD3DSIH_HS_CONTROL_POINT_PHASE: @@ -5733,12 +5762,14 @@ static void vsir_validate_instruction(struct validation_context *ctx) vsir_validate_dst_count(ctx, instruction, 0); vsir_validate_src_count(ctx, instruction, 0); if (version->type != VKD3D_SHADER_TYPE_HULL) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Phase instruction %#x is only valid in a hull shader.", - instruction->handler_idx); + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "Phase instruction %#x is only valid in a hull shader.", + instruction->opcode); if (ctx->depth != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Phase instruction %#x must appear to top level.", - instruction->handler_idx); - ctx->phase = instruction->handler_idx; + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Phase instruction %#x must appear to top level.", + instruction->opcode); + ctx->phase = instruction->opcode; ctx->dcl_temps_found = false; return; @@ -5812,7 +5843,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) && ctx->phase == VKD3DSIH_INVALID) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Instruction %#x appear before any phase instruction in a hull shader.", - instruction->handler_idx); + instruction->opcode); /* We support two different control flow types in shaders: * block-based, like DXIL and SPIR-V, and structured, like D3DBC @@ -5824,7 +5855,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) * block, but need for that hasn't arisen yet, so we don't. */ if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction)) { - if (instruction->handler_idx == VKD3DSIH_LABEL) + if (instruction->opcode == VKD3DSIH_LABEL) ctx->cf_type = CF_TYPE_BLOCKS; else ctx->cf_type = CF_TYPE_STRUCTURED; @@ -5832,7 +5863,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction)) { - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_LABEL: if (ctx->inside_block) @@ -5844,20 +5875,22 @@ static void vsir_validate_instruction(struct validation_context *ctx) case VKD3DSIH_BRANCH: case VKD3DSIH_SWITCH_MONOLITHIC: if (!ctx->inside_block) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", - instruction->handler_idx); + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Invalid instruction %#x outside any block.", + instruction->opcode); ctx->inside_block = false; break; default: if (!ctx->inside_block) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", - instruction->handler_idx); + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Invalid instruction %#x outside any block.", + instruction->opcode); break; } } - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_DCL_TEMPS: vsir_validate_dst_count(ctx, instruction, 0); @@ -5877,7 +5910,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) vsir_validate_src_count(ctx, instruction, 1); if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) return; - ctx->blocks[ctx->depth++] = instruction->handler_idx; + ctx->blocks[ctx->depth++] = instruction->opcode; break; case VKD3DSIH_IFC: @@ -5896,7 +5929,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block."); else - ctx->blocks[ctx->depth - 1] = instruction->handler_idx; + ctx->blocks[ctx->depth - 1] = instruction->opcode; break; case VKD3DSIH_ENDIF: @@ -5915,7 +5948,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0); if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) return; - ctx->blocks[ctx->depth++] = instruction->handler_idx; + ctx->blocks[ctx->depth++] = instruction->opcode; break; case VKD3DSIH_ENDLOOP: @@ -5934,7 +5967,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) vsir_validate_src_count(ctx, instruction, 1); if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) return; - ctx->blocks[ctx->depth++] = instruction->handler_idx; + ctx->blocks[ctx->depth++] = instruction->opcode; break; case VKD3DSIH_ENDREP: @@ -5953,7 +5986,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) vsir_validate_src_count(ctx, instruction, 1); if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) return; - ctx->blocks[ctx->depth++] = instruction->handler_idx; + ctx->blocks[ctx->depth++] = instruction->opcode; break; case VKD3DSIH_ENDSWITCH: diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index be50d3b9020..a3cdbe559a7 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -20,6 +20,7 @@ %{ +#include "preproc.h" #include "preproc.tab.h" #undef ERROR /* defined in wingdi.h */ diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 984a4f894f6..524fb8e9b1f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -1752,6 +1752,22 @@ static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *bu return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); } +static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_swap(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t val_id, uint32_t op_id) +{ + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); + return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadSwap, result_type, + vkd3d_spirv_get_op_scope_subgroup(builder), val_id, op_id); +} + +static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t val_id, uint32_t index_id) +{ + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); + return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadBroadcast, result_type, + vkd3d_spirv_get_op_scope_subgroup(builder), val_id, index_id); +} + static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_builder *builder, uint32_t result_type, uint32_t val_id) { @@ -6831,7 +6847,7 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, uint32_t function_id, void_id, function_type_id; struct vkd3d_shader_phase *phase; - assert(compiler->phase != instruction->handler_idx); + assert(compiler->phase != instruction->opcode); if (!is_in_default_phase(compiler)) spirv_compiler_leave_shader_phase(compiler); @@ -6843,16 +6859,16 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, vkd3d_spirv_build_op_function(builder, void_id, function_id, SpvFunctionControlMaskNone, function_type_id); - compiler->phase = instruction->handler_idx; + compiler->phase = instruction->opcode; spirv_compiler_emit_shader_phase_name(compiler, function_id, NULL); - phase = (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) + phase = (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) ? &compiler->control_point_phase : &compiler->patch_constant_phase; phase->function_id = function_id; /* The insertion location must be set after the label is emitted. */ phase->function_location = 0; - if (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) + if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) compiler->emit_default_control_point_phase = instruction->flags; } @@ -7016,7 +7032,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru { static const struct { - enum vkd3d_shader_opcode handler_idx; + enum vkd3d_shader_opcode opcode; SpvOp spirv_op; } alu_ops[] = @@ -7056,7 +7072,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru for (i = 0; i < ARRAY_SIZE(alu_ops); ++i) { - if (alu_ops[i].handler_idx == instruction->handler_idx) + if (alu_ops[i].opcode == instruction->opcode) return alu_ops[i].spirv_op; } @@ -7065,7 +7081,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru static SpvOp spirv_compiler_map_logical_instruction(const struct vkd3d_shader_instruction *instruction) { - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_AND: return SpvOpLogicalAnd; @@ -7090,20 +7106,20 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) { - val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); + val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); } else if (dst->reg.data_type == VKD3D_DATA_DOUBLE) { /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ - val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); + val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); } else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) { - val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); + val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); } else if (dst->reg.data_type == VKD3D_DATA_UINT64) { - val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); + val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); } else { @@ -7126,7 +7142,7 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil SpvOp op = SpvOpMax; unsigned int i; - if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) + if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->opcode == VKD3DSIH_COUNTBITS) { /* At least some drivers support this anyway, but if validation is enabled it will fail. */ FIXME("Unsupported 64-bit source for bit count.\n"); @@ -7142,8 +7158,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil /* VSIR supports logic ops AND/OR/XOR on bool values. */ op = spirv_compiler_map_logical_instruction(instruction); } - else if (instruction->handler_idx == VKD3DSIH_ITOF || instruction->handler_idx == VKD3DSIH_UTOF - || instruction->handler_idx == VKD3DSIH_ITOI || instruction->handler_idx == VKD3DSIH_UTOU) + else if (instruction->opcode == VKD3DSIH_ITOF || instruction->opcode == VKD3DSIH_UTOF + || instruction->opcode == VKD3DSIH_ITOI || instruction->opcode == VKD3DSIH_UTOU) { /* VSIR supports cast from bool to signed/unsigned integer types and floating point types, * where bool is treated as a 1-bit integer and a signed 'true' value converts to -1. */ @@ -7158,9 +7174,9 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil if (op == SpvOpMax) { - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, - "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); + "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); return VKD3D_ERROR_INVALID_SHADER; } @@ -7179,8 +7195,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil * Microsoft fxc will compile immediate constants larger than 5 bits. * Fixing up the constants would be more elegant, but the simplest way is * to let this handle constants too. */ - if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->handler_idx == VKD3DSIH_ISHL - || instruction->handler_idx == VKD3DSIH_ISHR || instruction->handler_idx == VKD3DSIH_USHR)) + if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->opcode == VKD3DSIH_ISHL + || instruction->opcode == VKD3DSIH_ISHR || instruction->opcode == VKD3DSIH_USHR)) { uint32_t mask_id = spirv_compiler_get_constant_vector(compiler, VKD3D_SHADER_COMPONENT_UINT, vsir_write_mask_component_count(dst->write_mask), 0x1f); @@ -7218,7 +7234,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( { static const struct { - enum vkd3d_shader_opcode handler_idx; + enum vkd3d_shader_opcode opcode; enum GLSLstd450 glsl_inst; } glsl_insts[] = @@ -7258,7 +7274,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( for (i = 0; i < ARRAY_SIZE(glsl_insts); ++i) { - if (glsl_insts[i].handler_idx == instruction->handler_idx) + if (glsl_insts[i].opcode == instruction->opcode) return glsl_insts[i].glsl_inst; } @@ -7276,20 +7292,20 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp unsigned int i, component_count; enum GLSLstd450 glsl_inst; - if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI - || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) + if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->opcode == VKD3DSIH_FIRSTBIT_HI + || instruction->opcode == VKD3DSIH_FIRSTBIT_LO || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI)) { /* At least some drivers support this anyway, but if validation is enabled it will fail. */ - FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); + FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->opcode); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, - "64-bit source for handler %#x is not supported.", instruction->handler_idx); + "64-bit source for handler %#x is not supported.", instruction->opcode); return; } glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); if (glsl_inst == GLSLstd450Bad) { - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); return; } @@ -7306,8 +7322,8 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp val_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, instr_set_id, glsl_inst, src_id, instruction->src_count); - if (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI - || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI) + if (instruction->opcode == VKD3DSIH_FIRSTBIT_HI + || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI) { /* In D3D bits are numbered from the most significant bit. */ component_count = vsir_write_mask_component_count(dst->write_mask); @@ -7415,7 +7431,7 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, if (src[0].reg.data_type != VKD3D_DATA_BOOL) { - if (instruction->handler_idx == VKD3DSIH_CMP) + if (instruction->opcode == VKD3DSIH_CMP) condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); @@ -7469,9 +7485,9 @@ static void spirv_compiler_emit_dot(struct spirv_compiler *compiler, component_count = vsir_write_mask_component_count(dst->write_mask); component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); - if (instruction->handler_idx == VKD3DSIH_DP4) + if (instruction->opcode == VKD3DSIH_DP4) write_mask = VKD3DSP_WRITEMASK_ALL; - else if (instruction->handler_idx == VKD3DSIH_DP3) + else if (instruction->opcode == VKD3DSIH_DP3) write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_2; else write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; @@ -7606,8 +7622,8 @@ static void spirv_compiler_emit_int_div(struct spirv_compiler *compiler, unsigned int component_count = 0; SpvOp div_op, mod_op; - div_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; - mod_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; + div_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; + mod_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; if (dst[0].reg.type != VKD3DSPR_NULL) { @@ -7778,13 +7794,13 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp mask_id = spirv_compiler_get_constant_uint(compiler, size - 1); size_id = spirv_compiler_get_constant_uint(compiler, size); - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_BFI: op = SpvOpBitFieldInsert; break; case VKD3DSIH_IBFE: op = SpvOpBitFieldSExtract; break; case VKD3DSIH_UBFE: op = SpvOpBitFieldUExtract; break; default: - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); return; } @@ -7895,7 +7911,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co unsigned int component_count; SpvOp op; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_DEQO: case VKD3DSIH_EQO: op = SpvOpFOrdEqual; break; @@ -7916,7 +7932,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co case VKD3DSIH_UGE: op = SpvOpUGreaterThanEqual; break; case VKD3DSIH_ULT: op = SpvOpULessThan; break; default: - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); return; } @@ -7949,7 +7965,7 @@ static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *c src0_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src0_id); src1_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src1_id); val_id = vkd3d_spirv_build_op_logical_or(builder, type_id, src0_id, src1_id); - if (instruction->handler_idx == VKD3DSIH_ORD) + if (instruction->opcode == VKD3DSIH_ORD) val_id = vkd3d_spirv_build_op_logical_not(builder, type_id, val_id); spirv_compiler_emit_store_dst(compiler, dst, val_id); } @@ -7964,7 +7980,7 @@ static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compil unsigned int component_count; SpvOp op; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; @@ -8262,7 +8278,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile static const struct instruction_info { - enum vkd3d_shader_opcode handler_idx; + enum vkd3d_shader_opcode opcode; SpvOp op; bool needs_derivative_control; } @@ -8279,7 +8295,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile info = NULL; for (i = 0; i < ARRAY_SIZE(deriv_instructions); ++i) { - if (deriv_instructions[i].handler_idx == instruction->handler_idx) + if (deriv_instructions[i].opcode == instruction->opcode) { info = &deriv_instructions[i]; break; @@ -8287,7 +8303,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile } if (!info) { - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); return; } @@ -8497,7 +8513,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, uint32_t coordinate_mask; bool multisample; - multisample = instruction->handler_idx == VKD3DSIH_LD2DMS; + multisample = instruction->opcode == VKD3DSIH_LD2DMS; spirv_compiler_prepare_image(compiler, &image, &src[1].reg, NULL, VKD3D_IMAGE_FLAG_NONE); @@ -8576,7 +8592,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, spirv_compiler_prepare_image(compiler, &image, &resource->reg, &sampler->reg, VKD3D_IMAGE_FLAG_SAMPLED); - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_SAMPLE: op = SpvOpImageSampleImplicitLod; @@ -8603,7 +8619,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, &src[3], VKD3DSP_WRITEMASK_0); break; default: - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); return; } @@ -8637,7 +8653,7 @@ static void spirv_compiler_emit_sample_c(struct spirv_compiler *compiler, uint32_t image_operands[2]; SpvOp op; - if (instruction->handler_idx == VKD3DSIH_SAMPLE_C_LZ) + if (instruction->opcode == VKD3DSIH_SAMPLE_C_LZ) { op = SpvOpImageSampleDrefExplicitLod; operands_mask |= SpvImageOperandsLodMask; @@ -8687,12 +8703,12 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, uint32_t coordinate_mask; bool extended_offset; - if (instruction->handler_idx == VKD3DSIH_GATHER4_C - || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C) + if (instruction->opcode == VKD3DSIH_GATHER4_C + || instruction->opcode == VKD3DSIH_GATHER4_PO_C) image_flags |= VKD3D_IMAGE_FLAG_DEPTH; - extended_offset = instruction->handler_idx == VKD3DSIH_GATHER4_PO - || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C; + extended_offset = instruction->opcode == VKD3DSIH_GATHER4_PO + || instruction->opcode == VKD3DSIH_GATHER4_PO_C; addr = &src[0]; offset = extended_offset ? &src[1] : NULL; @@ -8963,7 +8979,6 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * { type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); spirv_compiler_prepare_image(compiler, &image, &dst->reg, NULL, VKD3D_IMAGE_FLAG_NONE); - assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !image.structure_stride); base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); @@ -9007,7 +9022,6 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); - assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !reg_info.structure_stride); base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); @@ -9145,7 +9159,7 @@ static void spirv_compiler_emit_uav_counter_instruction(struct spirv_compiler *c uint32_t operands[3]; SpvOp op; - op = instruction->handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC + op = instruction->opcode == VKD3DSIH_IMM_ATOMIC_ALLOC ? SpvOpAtomicIIncrement : SpvOpAtomicIDecrement; resource_symbol = spirv_compiler_find_resource(compiler, &src->reg); @@ -9211,7 +9225,7 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins { static const struct { - enum vkd3d_shader_opcode handler_idx; + enum vkd3d_shader_opcode opcode; SpvOp spirv_op; } atomic_ops[] = @@ -9240,16 +9254,16 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins for (i = 0; i < ARRAY_SIZE(atomic_ops); ++i) { - if (atomic_ops[i].handler_idx == instruction->handler_idx) + if (atomic_ops[i].opcode == instruction->opcode) return atomic_ops[i].spirv_op; } return SpvOpMax; } -static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode handler_idx) +static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode opcode) { - return VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR; + return VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR; } static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compiler, @@ -9274,12 +9288,12 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil bool raw; SpvOp op; - resource = is_imm_atomic_instruction(instruction->handler_idx) ? &dst[1] : &dst[0]; + resource = is_imm_atomic_instruction(instruction->opcode) ? &dst[1] : &dst[0]; op = spirv_compiler_map_atomic_instruction(instruction); if (op == SpvOpMax) { - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); return; } @@ -9360,7 +9374,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil { WARN("Ignoring 'volatile' attribute.\n"); spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG, - "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->handler_idx); + "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->opcode); } memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) @@ -9379,7 +9393,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream, op, type_id, operands, i); - if (is_imm_atomic_instruction(instruction->handler_idx)) + if (is_imm_atomic_instruction(instruction->opcode)) spirv_compiler_emit_store_dst(compiler, dst, result_id); } @@ -9684,13 +9698,13 @@ static void spirv_compiler_emit_eval_attrib(struct spirv_compiler *compiler, src_ids[src_count++] = register_info.id; - if (instruction->handler_idx == VKD3DSIH_EVAL_CENTROID) + if (instruction->opcode == VKD3DSIH_EVAL_CENTROID) { op = GLSLstd450InterpolateAtCentroid; } else { - assert(instruction->handler_idx == VKD3DSIH_EVAL_SAMPLE_INDEX); + assert(instruction->opcode == VKD3DSIH_EVAL_SAMPLE_INDEX); op = GLSLstd450InterpolateAtSample; src_ids[src_count++] = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); } @@ -9772,7 +9786,7 @@ static void spirv_compiler_emit_emit_stream(struct spirv_compiler *compiler, struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; unsigned int stream_idx; - if (instruction->handler_idx == VKD3DSIH_EMIT_STREAM) + if (instruction->opcode == VKD3DSIH_EMIT_STREAM) stream_idx = instruction->src[0].reg.idx[0].offset; else stream_idx = 0; @@ -9793,7 +9807,7 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; unsigned int stream_idx; - if (instruction->handler_idx == VKD3DSIH_CUT_STREAM) + if (instruction->opcode == VKD3DSIH_CUT_STREAM) stream_idx = instruction->src[0].reg.idx[0].offset; else stream_idx = 0; @@ -9807,9 +9821,68 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, vkd3d_spirv_build_op_end_primitive(builder); } -static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode handler_idx) +static uint32_t map_quad_read_across_direction(enum vkd3d_shader_opcode opcode) { - switch (handler_idx) + switch (opcode) + { + case VKD3DSIH_QUAD_READ_ACROSS_X: + return 0; + case VKD3DSIH_QUAD_READ_ACROSS_Y: + return 1; + case VKD3DSIH_QUAD_READ_ACROSS_D: + return 2; + default: + vkd3d_unreachable(); + } +} + +static void spirv_compiler_emit_quad_read_across(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, direction_type_id, direction_id, val_id; + + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, + vsir_write_mask_component_count(dst->write_mask)); + direction_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, VKD3D_DATA_UINT, 1); + val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + direction_id = map_quad_read_across_direction(instruction->opcode); + direction_id = vkd3d_spirv_get_op_constant(builder, direction_type_id, direction_id); + val_id = vkd3d_spirv_build_op_group_nonuniform_quad_swap(builder, type_id, val_id, direction_id); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void spirv_compiler_emit_quad_read_lane_at(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, val_id, lane_id; + + if (!register_is_constant_or_undef(&src[1].reg)) + { + FIXME("Unsupported non-constant quad read lane index.\n"); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "Non-constant quad read lane indices are not supported."); + return; + } + + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, + vsir_write_mask_component_count(dst->write_mask)); + val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); + lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); + val_id = vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(builder, type_id, val_id, lane_id); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + +static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode opcode) +{ + switch (opcode) { case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: return SpvOpGroupNonUniformAllEqual; @@ -9833,7 +9906,7 @@ static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformVote); - op = map_wave_bool_op(instruction->handler_idx); + op = map_wave_bool_op(instruction->opcode); type_id = vkd3d_spirv_get_op_type_bool(builder); val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, @@ -9865,9 +9938,9 @@ static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compil spirv_compiler_emit_store_dst(compiler, dst, val_id); } -static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode handler_idx, bool is_float) +static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode opcode, bool is_float) { - switch (handler_idx) + switch (opcode) { case VKD3DSIH_WAVE_ACTIVE_BIT_AND: return SpvOpGroupNonUniformBitwiseAnd; @@ -9905,7 +9978,7 @@ static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, uint32_t type_id, val_id; SpvOp op; - op = map_wave_alu_op(instruction->handler_idx, data_type_is_floating_point(src->reg.data_type)); + op = map_wave_alu_op(instruction->opcode, data_type_is_floating_point(src->reg.data_type)); type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, vsir_write_mask_component_count(dst->write_mask)); @@ -9928,7 +10001,7 @@ static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, SpvGroupOperation group_op; uint32_t type_id, val_id; - group_op = (instruction->handler_idx == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan + group_op = (instruction->opcode == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan : SpvGroupOperationReduce; val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); @@ -10014,7 +10087,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, compiler->location = instruction->location; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_DCL_GLOBAL_FLAGS: spirv_compiler_emit_dcl_global_flags(compiler, instruction); @@ -10337,6 +10410,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_CUT_STREAM: spirv_compiler_emit_cut_stream(compiler, instruction); break; + case VKD3DSIH_QUAD_READ_ACROSS_D: + case VKD3DSIH_QUAD_READ_ACROSS_X: + case VKD3DSIH_QUAD_READ_ACROSS_Y: + spirv_compiler_emit_quad_read_across(compiler, instruction); + break; + case VKD3DSIH_QUAD_READ_LANE_AT: + spirv_compiler_emit_quad_read_lane_at(compiler, instruction); + break; case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: case VKD3DSIH_WAVE_ALL_TRUE: case VKD3DSIH_WAVE_ANY_TRUE: @@ -10390,9 +10471,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, /* nothing to do */ break; default: - FIXME("Unhandled instruction %#x.\n", instruction->handler_idx); + FIXME("Unhandled instruction %#x.\n", instruction->opcode); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, - "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); + "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); break; } diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index b562e815a81..a7c37215e5e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -780,7 +780,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) { FIXME("Ignoring shader data type %#x.\n", type); - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; return; } @@ -789,7 +789,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui if (icb_size % 4) { FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } @@ -797,7 +797,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui { ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } icb->register_idx = 0; @@ -2395,16 +2395,16 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str if (!(opcode_info = get_info_from_sm4_opcode(&sm4->lookup, opcode))) { FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; *ptr += len; return; } vsir_instruction_init(ins, &sm4->p.location, opcode_info->handler_idx); - if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE - || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) - sm4->phase = ins->handler_idx; - sm4->has_control_point_phase |= ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE; + if (ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->opcode == VKD3DSIH_HS_FORK_PHASE + || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) + sm4->phase = ins->opcode; + sm4->has_control_point_phase |= ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE; ins->flags = 0; ins->coissue = false; ins->raw = false; @@ -2417,7 +2417,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str { ERR("Failed to allocate src parameters.\n"); vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; @@ -2459,7 +2459,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str { ERR("Failed to allocate dst parameters.\n"); vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } for (i = 0; i < ins->dst_count; ++i) @@ -2467,7 +2467,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), &dst_params[i]))) { - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } dst_params[i].modifiers |= instruction_dst_modifier; @@ -2478,7 +2478,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), &src_params[i]))) { - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } } @@ -2488,7 +2488,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str fail: *ptr = sm4->end; - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } @@ -2693,7 +2693,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con ins = &instructions->elements[instructions->count]; shader_sm4_read_instruction(&sm4, ins); - if (ins->handler_idx == VKD3DSIH_INVALID) + if (ins->opcode == VKD3DSIH_INVALID) { WARN("Encountered unrecognized or invalid instruction.\n"); vsir_program_cleanup(program); @@ -2762,6 +2762,7 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_OUTPUT, true}, + {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_SAMPLEMASK, false}, }; for (i = 0; i < ARRAY_SIZE(register_table); ++i) @@ -2817,6 +2818,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_COVERAGE}, {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, @@ -2984,11 +2986,13 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) return D3D_SVC_VECTOR; case HLSL_CLASS_ARRAY: + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_STRUCT: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_STRING: @@ -2997,6 +3001,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) case HLSL_CLASS_UAV: case HLSL_CLASS_VERTEX_SHADER: case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: break; } vkd3d_unreachable(); @@ -3178,13 +3183,14 @@ struct extern_resource /* var is only not NULL if this resource is a whole variable, so it may be responsible for more * than one component. */ const struct hlsl_ir_var *var; + const struct hlsl_buffer *buffer; char *name; struct hlsl_type *data_type; bool is_user_packed; enum hlsl_regset regset; - unsigned int id, bind_count; + unsigned int id, space, index, bind_count; }; static int sm4_compare_extern_resources(const void *a, const void *b) @@ -3196,7 +3202,10 @@ static int sm4_compare_extern_resources(const void *a, const void *b) if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) return r; - return vkd3d_u32_compare(aa->id, bb->id); + if ((r = vkd3d_u32_compare(aa->space, bb->space))) + return r; + + return vkd3d_u32_compare(aa->index, bb->index); } static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) @@ -3220,6 +3229,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; struct extern_resource *extern_resources = NULL; const struct hlsl_ir_var *var; + struct hlsl_buffer *buffer; enum hlsl_regset regset; size_t capacity = 0; char *name; @@ -3272,13 +3282,16 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un hlsl_release_string_buffer(ctx, name_buffer); extern_resources[*count].var = NULL; + extern_resources[*count].buffer = NULL; extern_resources[*count].name = name; extern_resources[*count].data_type = component_type; extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; extern_resources[*count].regset = regset; - extern_resources[*count].id = var->regs[regset].id + regset_offset; + extern_resources[*count].id = var->regs[regset].id; + extern_resources[*count].space = var->regs[regset].space; + extern_resources[*count].index = var->regs[regset].index + regset_offset; extern_resources[*count].bind_count = 1; ++*count; @@ -3313,13 +3326,19 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un } extern_resources[*count].var = var; + extern_resources[*count].buffer = NULL; extern_resources[*count].name = name; extern_resources[*count].data_type = var->data_type; - extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; + /* For some reason 5.1 resources aren't marked as + * user-packed, but cbuffers still are. */ + extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) + && !!var->reg_reservation.reg_type; extern_resources[*count].regset = r; extern_resources[*count].id = var->regs[r].id; + extern_resources[*count].space = var->regs[r].space; + extern_resources[*count].index = var->regs[r].index; extern_resources[*count].bind_count = var->bind_count[r]; ++*count; @@ -3327,14 +3346,51 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un } } + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!buffer->reg.allocated) + continue; + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + if (!(name = hlsl_strdup(ctx, buffer->name))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + extern_resources[*count].var = NULL; + extern_resources[*count].buffer = buffer; + + extern_resources[*count].name = name; + extern_resources[*count].data_type = NULL; + extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; + + extern_resources[*count].regset = HLSL_REGSET_NUMERIC; + extern_resources[*count].id = buffer->reg.id; + extern_resources[*count].space = buffer->reg.space; + extern_resources[*count].index = buffer->reg.index; + extern_resources[*count].bind_count = 1; + + ++*count; + } + qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); return extern_resources; } static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { - unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; + uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); size_t cbuffers_offset, resources_offset, creator_offset, string_offset; + unsigned int cbuffer_count = 0, extern_resources_count, i, j; size_t cbuffer_position, resource_position, creator_position; const struct hlsl_profile_info *profile = ctx->profile; struct vkd3d_bytecode_buffer buffer = {0}; @@ -3354,19 +3410,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - resource_count += extern_resources_count; LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) - { ++cbuffer_count; - ++resource_count; - } } put_u32(&buffer, cbuffer_count); cbuffer_position = put_u32(&buffer, 0); - put_u32(&buffer, resource_count); + put_u32(&buffer, extern_resources_count); resource_position = put_u32(&buffer, 0); put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), target_types[profile->type])); @@ -3378,7 +3430,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ - put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ + put_u32(&buffer, binding_desc_size); /* size of binding desc */ put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ @@ -3395,21 +3447,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) const struct extern_resource *resource = &extern_resources[i]; uint32_t flags = 0; - if (hlsl_version_ge(ctx, 5, 1)) - hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); - if (resource->is_user_packed) flags |= D3D_SIF_USERPACKED; put_u32(&buffer, 0); /* name */ - put_u32(&buffer, sm4_resource_type(resource->data_type)); - if (resource->regset == HLSL_REGSET_SAMPLERS) - { - put_u32(&buffer, 0); - put_u32(&buffer, 0); - put_u32(&buffer, 0); - } + if (resource->buffer) + put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); else + put_u32(&buffer, sm4_resource_type(resource->data_type)); + if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) { unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource.format->dimx; @@ -3418,32 +3464,21 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) put_u32(&buffer, ~0u); /* FIXME: multisample count */ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; } - put_u32(&buffer, resource->id); + else + { + put_u32(&buffer, 0); + put_u32(&buffer, 0); + put_u32(&buffer, 0); + } + put_u32(&buffer, resource->index); put_u32(&buffer, resource->bind_count); put_u32(&buffer, flags); - } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - uint32_t flags = 0; - - if (!cbuffer->reg.allocated) - continue; if (hlsl_version_ge(ctx, 5, 1)) - hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); - - if (cbuffer->reservation.reg_type) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); - put_u32(&buffer, 0); /* return type */ - put_u32(&buffer, 0); /* dimension */ - put_u32(&buffer, 0); /* multisample count */ - put_u32(&buffer, cbuffer->reg.id); /* bind point */ - put_u32(&buffer, 1); /* bind count */ - put_u32(&buffer, flags); /* flags */ + { + put_u32(&buffer, resource->space); + put_u32(&buffer, resource->id); + } } for (i = 0; i < extern_resources_count; ++i) @@ -3451,16 +3486,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) const struct extern_resource *resource = &extern_resources[i]; string_offset = put_string(&buffer, resource->name); - set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); - } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (!cbuffer->reg.allocated) - continue; - - string_offset = put_string(&buffer, cbuffer->name); - set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); + set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); } /* Buffers. */ @@ -3522,7 +3548,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); put_u32(&buffer, flags); put_u32(&buffer, 0); /* type */ - put_u32(&buffer, 0); /* FIXME: default value */ + put_u32(&buffer, 0); /* default value */ if (profile->major_version >= 5) { @@ -3546,6 +3572,34 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) set_u32(&buffer, var_offset, string_offset); write_sm4_type(ctx, &buffer, var->data_type); set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); + + if (var->default_values) + { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int comp_count = hlsl_type_component_count(var->data_type); + unsigned int default_value_offset; + unsigned int k; + + default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); + set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); + + for (k = 0; k < comp_count; ++k) + { + struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); + unsigned int comp_offset; + enum hlsl_regset regset; + + comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); + if (regset == HLSL_REGSET_NUMERIC) + { + if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) + hlsl_fixme(ctx, &var->loc, "Write double default values."); + + set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), + var->default_values[k].value.u); + } + } + } ++j; } } @@ -3720,30 +3774,57 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { reg->type = VKD3DSPR_RESOURCE; reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + if (hlsl_version_ge(ctx, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } assert(regset == HLSL_REGSET_TEXTURES); - reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } else if (regset == HLSL_REGSET_UAVS) { reg->type = VKD3DSPR_UAV; reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + if (hlsl_version_ge(ctx, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } assert(regset == HLSL_REGSET_UAVS); - reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } else if (regset == HLSL_REGSET_SAMPLERS) { reg->type = VKD3DSPR_SAMPLER; reg->dimension = VSIR_DIMENSION_NONE; - reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + if (hlsl_version_ge(ctx, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } assert(regset == HLSL_REGSET_SAMPLERS); - reg->idx_count = 1; *writemask = VKD3DSP_WRITEMASK_ALL; } else @@ -3753,9 +3834,19 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re assert(data_type->class <= HLSL_CLASS_VECTOR); reg->type = VKD3DSPR_CONSTBUFFER; reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = var->buffer->reg.id; - reg->idx[1].offset = offset / 4; - reg->idx_count = 2; + if (hlsl_version_ge(ctx, 5, 1)) + { + reg->idx[0].offset = var->buffer->reg.id; + reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ + reg->idx[2].offset = offset / 4; + reg->idx_count = 3; + } + else + { + reg->idx[0].offset = var->buffer->reg.index; + reg->idx[1].offset = offset / 4; + reg->idx_count = 2; + } *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); } } @@ -4139,18 +4230,36 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) { - const struct sm4_instruction instr = + size_t size = (cbuffer->used_size + 3) / 4; + + struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, .srcs[0].reg.dimension = VSIR_DIMENSION_VEC4, .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, - .srcs[0].reg.idx[0].offset = cbuffer->reg.id, - .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, - .srcs[0].reg.idx_count = 2, .srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE, .src_count = 1, }; + + if (hlsl_version_ge(tpf->ctx, 5, 1)) + { + instr.srcs[0].reg.idx[0].offset = cbuffer->reg.id; + instr.srcs[0].reg.idx[1].offset = cbuffer->reg.index; + instr.srcs[0].reg.idx[2].offset = cbuffer->reg.index; /* FIXME: array end */ + instr.srcs[0].reg.idx_count = 3; + + instr.idx[0] = size; + instr.idx[1] = cbuffer->reg.space; + instr.idx_count = 2; + } + else + { + instr.srcs[0].reg.idx[0].offset = cbuffer->reg.index; + instr.srcs[0].reg.idx[1].offset = size; + instr.srcs[0].reg.idx_count = 2; + } + write_sm4_instruction(tpf, &instr); } @@ -4163,7 +4272,6 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex .opcode = VKD3D_SM4_OP_DCL_SAMPLER, .dsts[0].reg.type = VKD3DSPR_SAMPLER, - .dsts[0].reg.idx_count = 1, .dst_count = 1, }; @@ -4179,7 +4287,22 @@ static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct ex if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) continue; - instr.dsts[0].reg.idx[0].offset = resource->id + i; + if (hlsl_version_ge(tpf->ctx, 5, 1)) + { + assert(!i); + instr.dsts[0].reg.idx[0].offset = resource->id; + instr.dsts[0].reg.idx[1].offset = resource->index; + instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ + instr.dsts[0].reg.idx_count = 3; + + instr.idx[0] = resource->space; + instr.idx_count = 1; + } + else + { + instr.dsts[0].reg.idx[0].offset = resource->index + i; + instr.dsts[0].reg.idx_count = 1; + } write_sm4_instruction(tpf, &instr); } } @@ -4212,6 +4335,23 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex .idx_count = 1, }; + if (hlsl_version_ge(tpf->ctx, 5, 1)) + { + assert(!i); + instr.dsts[0].reg.idx[0].offset = resource->id; + instr.dsts[0].reg.idx[1].offset = resource->index; + instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ + instr.dsts[0].reg.idx_count = 3; + + instr.idx[1] = resource->space; + instr.idx_count = 2; + } + else + { + instr.dsts[0].reg.idx[0].offset = resource->index + i; + instr.dsts[0].reg.idx_count = 1; + } + if (uav) { switch (resource->data_type->sampler_dim) @@ -4904,6 +5044,25 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct write_sm4_instruction(tpf, &instr); } +static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; + instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; + instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; + instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + instr.src_count = 1; + + write_sm4_instruction(tpf, &instr); +} + static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) { const struct hlsl_ir_node *arg1 = expr->operands[0].node; @@ -4919,6 +5078,14 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex switch (expr->op) { + case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: + if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) + write_sm4_rasterizer_sample_count(tpf, &expr->node); + else + hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); + break; + case HLSL_OP1_ABS: switch (dst_type->e.numeric.type) { @@ -5799,21 +5966,13 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) - { - if (hlsl_version_ge(ctx, 5, 1)) - hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); - write_sm4_dcl_constant_buffer(&tpf, cbuffer); - } } for (i = 0; i < extern_resources_count; ++i) { const struct extern_resource *resource = &extern_resources[i]; - if (hlsl_version_ge(ctx, 5, 1)) - hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); - if (resource->regset == HLSL_REGSET_SAMPLERS) write_sm4_dcl_samplers(&tpf, resource); else if (resource->regset == HLSL_REGSET_TEXTURES) @@ -5875,7 +6034,7 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); for (unsigned int i = 0; i < extern_resources_count; ++i) { - if (extern_resources[i].data_type->e.resource.rasteriser_ordered) + if (extern_resources[i].data_type && extern_resources[i].data_type->e.resource.rasteriser_ordered) *flags |= VKD3D_SM4_REQUIRES_ROVS; } sm4_free_extern_resources(extern_resources, extern_resources_count); diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index 14a3fa778e5..fdbde019111 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ #include #include +/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ + static inline int char_to_int(char c) { if ('0' <= c && c <= '9') @@ -847,12 +849,13 @@ static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_contex static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) { - enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; - return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) - || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR) - || handler_idx == VKD3DSIH_LD_UAV_TYPED - || (handler_idx == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) - || (handler_idx == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); + enum vkd3d_shader_opcode opcode = instruction->opcode; + + return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) + || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR) + || opcode == VKD3DSIH_LD_UAV_TYPED + || (opcode == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) + || (opcode == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); } static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, @@ -863,9 +866,9 @@ static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context * static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) { - enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; - return handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC - || handler_idx == VKD3DSIH_IMM_ATOMIC_CONSUME; + enum vkd3d_shader_opcode opcode = instruction->opcode; + + return opcode == VKD3DSIH_IMM_ATOMIC_ALLOC || opcode == VKD3DSIH_IMM_ATOMIC_CONSUME; } static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, @@ -876,9 +879,10 @@ static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_contex static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) { - enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; - return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) - || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR); + enum vkd3d_shader_opcode opcode = instruction->opcode; + + return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) + || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR); } static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, @@ -1130,7 +1134,7 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte context->location = instruction->location; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_DCL_CONSTANT_BUFFER: vkd3d_shader_scan_constant_buffer_declaration(context, instruction); diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 29b8d6ad022..96e613669a6 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -151,6 +151,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, + VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, + VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -455,6 +457,10 @@ enum vkd3d_shader_opcode VKD3DSIH_PHASE, VKD3DSIH_PHI, VKD3DSIH_POW, + VKD3DSIH_QUAD_READ_ACROSS_D, + VKD3DSIH_QUAD_READ_ACROSS_X, + VKD3DSIH_QUAD_READ_ACROSS_Y, + VKD3DSIH_QUAD_READ_LANE_AT, VKD3DSIH_RCP, VKD3DSIH_REP, VKD3DSIH_RESINFO, @@ -805,6 +811,7 @@ enum vkd3d_tessellator_domain #define VKD3DSI_NONE 0x0 #define VKD3DSI_TEXLD_PROJECT 0x1 +#define VKD3DSI_TEXLD_BIAS 0x2 #define VKD3DSI_INDEXED_DYNAMIC 0x4 #define VKD3DSI_RESINFO_RCP_FLOAT 0x1 #define VKD3DSI_RESINFO_UINT 0x2 @@ -1189,7 +1196,7 @@ struct vkd3d_shader_location struct vkd3d_shader_instruction { struct vkd3d_shader_location location; - enum vkd3d_shader_opcode handler_idx; + enum vkd3d_shader_opcode opcode; uint32_t flags; unsigned int dst_count; unsigned int src_count; @@ -1238,8 +1245,8 @@ static inline bool vkd3d_shader_ver_le(const struct vkd3d_shader_version *v, uns return v->major < major || (v->major == major && v->minor <= minor); } -void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode handler_idx); +void vsir_instruction_init(struct vkd3d_shader_instruction *ins, + const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode); static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) { diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index 95366d3441b..2354938c08d 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -2025,7 +2025,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, unsigned int stencil_state, const struct d3d12_resource *resource, VkQueueFlags vk_queue_flags, const struct vkd3d_vulkan_info *vk_info, - VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout) + VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout, + struct d3d12_device *device) { bool is_swapchain_image = resource && (resource->flags & VKD3D_RESOURCE_PRESENT_STATE_TRANSITION); VkPipelineStageFlags queue_shader_stages = 0; @@ -2033,10 +2034,12 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT) { queue_shader_stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT - | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT - | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT - | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + if (device->vk_info.geometry_shaders) + queue_shader_stages |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; + if (device->vk_info.tessellation_shaders) + queue_shader_stages |= VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT + | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; } if (vk_queue_flags & VK_QUEUE_COMPUTE_BIT) queue_shader_stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; @@ -2054,7 +2057,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, { if (resource->present_state != D3D12_RESOURCE_STATE_PRESENT) return vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, - resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); + resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout, device); *access_mask = VK_ACCESS_MEMORY_READ_BIT; *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; @@ -2251,7 +2254,8 @@ static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12 VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED; if (!vk_barrier_parameters_from_d3d12_resource_state(resource->initial_state, 0, - resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, &dst_stage_mask, &barrier.newLayout)) + resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, + &dst_stage_mask, &barrier.newLayout, list->device)) { FIXME("Unhandled state %#x.\n", resource->initial_state); return; @@ -4277,13 +4281,15 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC } if (!vk_barrier_parameters_from_d3d12_resource_state(state_before, stencil_state_before, - resource, list->vk_queue_flags, vk_info, &src_access_mask, &src_stage_mask, &layout_before)) + resource, list->vk_queue_flags, vk_info, &src_access_mask, + &src_stage_mask, &layout_before, list->device)) { FIXME("Unhandled state %#x.\n", state_before); continue; } if (!vk_barrier_parameters_from_d3d12_resource_state(state_after, stencil_state_after, - resource, list->vk_queue_flags, vk_info, &dst_access_mask, &dst_stage_mask, &layout_after)) + resource, list->vk_queue_flags, vk_info, &dst_access_mask, + &dst_stage_mask, &layout_after, list->device)) { FIXME("Unhandled state %#x.\n", state_after); continue; @@ -4303,7 +4309,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC resource = unsafe_impl_from_ID3D12Resource(uav->pResource); vk_barrier_parameters_from_d3d12_resource_state(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0, - resource, list->vk_queue_flags, vk_info, &access_mask, &stage_mask, &image_layout); + resource, list->vk_queue_flags, vk_info, &access_mask, + &stage_mask, &image_layout, list->device); src_access_mask = dst_access_mask = access_mask; src_stage_mask = dst_stage_mask = stage_mask; layout_before = layout_after = image_layout; @@ -4814,15 +4821,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi return; } - if (!views) - { - WARN("NULL \"views\" pointer specified.\n"); - return; - } - for (i = 0; i < view_count; ++i) { - if (views[i].BufferLocation) + if (views && views[i].BufferLocation) { resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferLocation); buffers[i] = resource->u.vk_buffer; @@ -5434,6 +5435,52 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 } } +static struct vkd3d_view *create_uint_view(struct d3d12_device *device, const struct vkd3d_resource_view *view, + struct d3d12_resource *resource, VkClearColorValue *colour) +{ + struct vkd3d_texture_view_desc view_desc; + const struct vkd3d_format *uint_format; + struct vkd3d_view *uint_view; + + if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) + && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, colour))) + { + ERR("Unhandled format %#x.\n", view->format->dxgi_format); + return NULL; + } + + if (d3d12_resource_is_buffer(resource)) + { + if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_buffer, + uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) + { + ERR("Failed to create buffer view.\n"); + return NULL; + } + + return uint_view; + } + + memset(&view_desc, 0, sizeof(view_desc)); + view_desc.view_type = view->info.texture.vk_view_type; + view_desc.format = uint_format; + view_desc.miplevel_idx = view->info.texture.miplevel_idx; + view_desc.miplevel_count = 1; + view_desc.layer_idx = view->info.texture.layer_idx; + view_desc.layer_count = view->info.texture.layer_count; + view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; + + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, + resource->u.vk_image, &view_desc, &uint_view)) + { + ERR("Failed to create image view.\n"); + return NULL; + } + + return uint_view; +} + static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList5 *iface, D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, const UINT values[4], UINT rect_count, const D3D12_RECT *rects) @@ -5441,8 +5488,6 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); struct vkd3d_view *descriptor, *uint_view = NULL; struct d3d12_device *device = list->device; - struct vkd3d_texture_view_desc view_desc; - const struct vkd3d_format *uint_format; const struct vkd3d_resource_view *view; struct d3d12_resource *resource_impl; VkClearColorValue colour; @@ -5456,44 +5501,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID view = &descriptor->v; memcpy(colour.uint32, values, sizeof(colour.uint32)); - if (view->format->type != VKD3D_FORMAT_TYPE_UINT) + if (view->format->type != VKD3D_FORMAT_TYPE_UINT + && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) { - if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) - && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, &colour))) - { - ERR("Unhandled format %#x.\n", view->format->dxgi_format); - return; - } - - if (d3d12_resource_is_buffer(resource_impl)) - { - if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, - uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) - { - ERR("Failed to create buffer view.\n"); - return; - } - } - else - { - memset(&view_desc, 0, sizeof(view_desc)); - view_desc.view_type = view->info.texture.vk_view_type; - view_desc.format = uint_format; - view_desc.miplevel_idx = view->info.texture.miplevel_idx; - view_desc.miplevel_count = 1; - view_desc.layer_idx = view->info.texture.layer_idx; - view_desc.layer_count = view->info.texture.layer_count; - view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; - - if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, - &uint_view)) - { - ERR("Failed to create image view.\n"); - return; - } - } - descriptor = uint_view; + ERR("Failed to create UINT view.\n"); + return; } d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); @@ -5507,19 +5519,32 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I const float values[4], UINT rect_count, const D3D12_RECT *rects) { struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct vkd3d_view *descriptor, *uint_view = NULL; + struct d3d12_device *device = list->device; + const struct vkd3d_resource_view *view; struct d3d12_resource *resource_impl; VkClearColorValue colour; - struct vkd3d_view *view; TRACE("iface %p, gpu_handle %s, cpu_handle %s, resource %p, values %p, rect_count %u, rects %p.\n", iface, debug_gpu_handle(gpu_handle), debug_cpu_handle(cpu_handle), resource, values, rect_count, rects); resource_impl = unsafe_impl_from_ID3D12Resource(resource); - if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) + if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) return; + view = &descriptor->v; memcpy(colour.float32, values, sizeof(colour.float32)); - d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); + if (view->format->type == VKD3D_FORMAT_TYPE_SINT + && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) + { + ERR("Failed to create UINT view.\n"); + return; + } + + d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); + + if (uint_view) + vkd3d_view_decref(uint_view, device); } static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList5 *iface, diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index cfc9c5f5ed3..2bbc170504e 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -76,6 +76,14 @@ static const char * const required_device_extensions[] = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, }; +/* In general we don't want to enable Vulkan beta extensions, but make an + * exception for VK_KHR_portability_subset because we draw no real feature from + * it, but it's still useful to be able to develop for MoltenVK without being + * spammed with validation errors. */ +#ifndef VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME +#define VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME "VK_KHR_portability_subset" +#endif + static const struct vkd3d_optional_extension_info optional_device_extensions[] = { /* KHR extensions */ @@ -85,6 +93,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(KHR_IMAGE_FORMAT_LIST, KHR_image_format_list), VK_EXTENSION(KHR_MAINTENANCE2, KHR_maintenance2), VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), + VK_EXTENSION(KHR_PORTABILITY_SUBSET, KHR_portability_subset), VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), @@ -92,7 +101,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_4444_FORMATS, EXT_4444_formats), VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), - VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), + VK_DEBUG_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), @@ -1634,6 +1643,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->device_limits = physical_device_info->properties2.properties.limits; vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; + vulkan_info->geometry_shaders = physical_device_info->features2.features.geometryShader; + vulkan_info->tessellation_shaders = physical_device_info->features2.features.tessellationShader; vulkan_info->sparse_binding = features->sparseBinding; vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; @@ -3806,7 +3817,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 return E_INVALIDARG; } - data->UnalignedBlockTexturesSupported = FALSE; + /* Vulkan does not restrict block texture alignment. */ + data->UnalignedBlockTexturesSupported = TRUE; TRACE("Unaligned block texture support %#x.\n", data->UnalignedBlockTexturesSupported); return S_OK; diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index c897d9f2c5a..7d7f40c0953 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -1809,14 +1809,6 @@ static bool d3d12_resource_validate_texture_format(const D3D12_RESOURCE_DESC1 *d return false; } - if (align(desc->Width, format->block_width) != desc->Width - || align(desc->Height, format->block_height) != desc->Height) - { - WARN("Invalid size %"PRIu64"x%u for block compressed format %#x.\n", - desc->Width, desc->Height, desc->Format); - return false; - } - return true; } @@ -4357,7 +4349,11 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript return hr; descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); - d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); + if (FAILED(hr = d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc))) + { + vkd3d_private_store_destroy(&descriptor_heap->private_store); + return hr; + } vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); d3d12_device_add_ref(descriptor_heap->device = device); diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index d1fa866d9e3..7acd39d65be 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -123,6 +123,7 @@ struct vkd3d_vulkan_info bool KHR_image_format_list; bool KHR_maintenance2; bool KHR_maintenance3; + bool KHR_portability_subset; bool KHR_push_descriptor; bool KHR_sampler_mirror_clamp_to_edge; bool KHR_timeline_semaphore; @@ -145,6 +146,8 @@ struct vkd3d_vulkan_info bool rasterization_stream; bool transform_feedback_queries; + bool geometry_shaders; + bool tessellation_shaders; bool uav_read_without_format; -- 2.43.0