From d4483ee98c1c4e9d51b6e7c82eafc991dc1ead80 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 5 Nov 2024 11:33:35 +1100 Subject: [PATCH] Updated vkd3d-latest patchset --- ...-03ad04c89004c7f800c5b1a0ea7ba286229.patch | 2 +- ...-4889c71857ce2152a9c9e014b9f5831f96d.patch | 2 +- ...-91701f83035c0d67d1ab917e0f6b73f91e8.patch | 2 +- ...-5eff8bf9188c401cc31ce14d42798dc3751.patch | 2 +- ...-ad2208b726f825305f69d099790208e4e4f.patch | 2 +- ...-794f4c30f4873841aaa0c5c9745ee732437.patch | 1338 +++++++++++++++++ 6 files changed, 1343 insertions(+), 5 deletions(-) create mode 100644 patches/vkd3d-latest/0006-Updated-vkd3d-to-794f4c30f4873841aaa0c5c9745ee732437.patch diff --git a/patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch b/patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch index d386bc74..d710cb28 100644 --- a/patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch +++ b/patches/vkd3d-latest/0001-Updated-vkd3d-to-03ad04c89004c7f800c5b1a0ea7ba286229.patch @@ -1,4 +1,4 @@ -From fce7894de005355e8e6b8ac475aeaf20e9c1e369 Mon Sep 17 00:00:00 2001 +From e81da48e8f4744caef2432dcf8044cbc5c5f2efb Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 3 Sep 2024 07:18:49 +1000 Subject: [PATCH] Updated vkd3d to 03ad04c89004c7f800c5b1a0ea7ba28622916328. diff --git a/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch b/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch index fa82a125..b8b2653c 100644 --- a/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch +++ b/patches/vkd3d-latest/0002-Updated-vkd3d-to-4889c71857ce2152a9c9e014b9f5831f96d.patch @@ -1,4 +1,4 @@ -From f834612a4a88cdacebe7ab1327beb71d2ba2fd5c Mon Sep 17 00:00:00 2001 +From b74ccacc75f28c2fa4896b758df9f479ac3e2399 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 23 Oct 2024 13:50:07 +1100 Subject: [PATCH] Updated vkd3d to 4889c71857ce2152a9c9e014b9f5831f96dc349b. diff --git a/patches/vkd3d-latest/0003-Updated-vkd3d-to-91701f83035c0d67d1ab917e0f6b73f91e8.patch b/patches/vkd3d-latest/0003-Updated-vkd3d-to-91701f83035c0d67d1ab917e0f6b73f91e8.patch index 5fdad889..b39da998 100644 --- a/patches/vkd3d-latest/0003-Updated-vkd3d-to-91701f83035c0d67d1ab917e0f6b73f91e8.patch +++ b/patches/vkd3d-latest/0003-Updated-vkd3d-to-91701f83035c0d67d1ab917e0f6b73f91e8.patch @@ -1,4 +1,4 @@ -From 00aa0577c99eb55b93f4b67d42baecdc9ff9f5d2 Mon Sep 17 00:00:00 2001 +From 2fa55f52e1789b6f6d894ed810467c58de169df1 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 24 Oct 2024 07:08:51 +1100 Subject: [PATCH] Updated vkd3d to 91701f83035c0d67d1ab917e0f6b73f91e8583d4. diff --git a/patches/vkd3d-latest/0004-Updated-vkd3d-to-5eff8bf9188c401cc31ce14d42798dc3751.patch b/patches/vkd3d-latest/0004-Updated-vkd3d-to-5eff8bf9188c401cc31ce14d42798dc3751.patch index 807a70ed..f3652284 100644 --- a/patches/vkd3d-latest/0004-Updated-vkd3d-to-5eff8bf9188c401cc31ce14d42798dc3751.patch +++ b/patches/vkd3d-latest/0004-Updated-vkd3d-to-5eff8bf9188c401cc31ce14d42798dc3751.patch @@ -1,4 +1,4 @@ -From 4cffbfb94f89317c6e5ce5f7c6aa488e67a8451d Mon Sep 17 00:00:00 2001 +From 77d31a307f455b130cde7abaade54f37ca3184a3 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 25 Oct 2024 07:38:01 +1100 Subject: [PATCH] Updated vkd3d to 5eff8bf9188c401cc31ce14d42798dc3751377bd. diff --git a/patches/vkd3d-latest/0005-Updated-vkd3d-to-ad2208b726f825305f69d099790208e4e4f.patch b/patches/vkd3d-latest/0005-Updated-vkd3d-to-ad2208b726f825305f69d099790208e4e4f.patch index a6b8a6da..d0940543 100644 --- a/patches/vkd3d-latest/0005-Updated-vkd3d-to-ad2208b726f825305f69d099790208e4e4f.patch +++ b/patches/vkd3d-latest/0005-Updated-vkd3d-to-ad2208b726f825305f69d099790208e4e4f.patch @@ -1,4 +1,4 @@ -From 96e8ac892cb6820820058ef20c96d49a28570b0e Mon Sep 17 00:00:00 2001 +From c0efe70386c12a81e226ac9a1a8cf1b3f3ee3840 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 30 Oct 2024 10:33:09 +1100 Subject: [PATCH] Updated vkd3d to ad2208b726f825305f69d099790208e4e4f85e35. diff --git a/patches/vkd3d-latest/0006-Updated-vkd3d-to-794f4c30f4873841aaa0c5c9745ee732437.patch b/patches/vkd3d-latest/0006-Updated-vkd3d-to-794f4c30f4873841aaa0c5c9745ee732437.patch new file mode 100644 index 00000000..d5a1b584 --- /dev/null +++ b/patches/vkd3d-latest/0006-Updated-vkd3d-to-794f4c30f4873841aaa0c5c9745ee732437.patch @@ -0,0 +1,1338 @@ +From 6216860a91bbf1faffc1d527bf654f2ced7e7cfe Mon Sep 17 00:00:00 2001 +From: Alistair Leslie-Hughes +Date: Tue, 5 Nov 2024 07:37:21 +1100 +Subject: [PATCH] Updated vkd3d to 794f4c30f4873841aaa0c5c9745ee732437e70db. + +--- + libs/vkd3d/libs/vkd3d-shader/fx.c | 2 +- + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 265 +++++++++--- + libs/vkd3d/libs/vkd3d-shader/ir.c | 397 +++++++++++++++--- + libs/vkd3d/libs/vkd3d-shader/tpf.c | 92 +++- + .../libs/vkd3d-shader/vkd3d_shader_main.c | 3 + + .../libs/vkd3d-shader/vkd3d_shader_private.h | 5 + + 6 files changed, 637 insertions(+), 127 deletions(-) + +diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c +index 8954feb22b7..5382dd94f98 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/fx.c ++++ b/libs/vkd3d/libs/vkd3d-shader/fx.c +@@ -3295,7 +3295,7 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int + + parse_fx_print_indent(parser); + vkd3d_string_buffer_printf(&parser->buffer, "}"); +- if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER && gs_so.sodecl) ++ if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl) + { + vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */", + fx_4_get_string(parser, gs_so.sodecl)); +diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +index ce431ee6815..213e403dcbd 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c ++++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +@@ -6579,7 +6579,43 @@ static void generate_vsir_signature(struct hlsl_ctx *ctx, + } + } + +-static uint32_t sm1_generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) ++static enum vkd3d_data_type vsir_data_type_from_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type) ++{ ++ if (hlsl_version_lt(ctx, 4, 0)) ++ return VKD3D_DATA_FLOAT; ++ ++ if (type->class == HLSL_CLASS_ARRAY) ++ return vsir_data_type_from_hlsl_type(ctx, type->e.array.type); ++ if (type->class == HLSL_CLASS_STRUCT) ++ return VKD3D_DATA_MIXED; ++ if (type->class <= HLSL_CLASS_LAST_NUMERIC) ++ { ++ switch (type->e.numeric.type) ++ { ++ case HLSL_TYPE_DOUBLE: ++ return VKD3D_DATA_DOUBLE; ++ case HLSL_TYPE_FLOAT: ++ return VKD3D_DATA_FLOAT; ++ case HLSL_TYPE_HALF: ++ return VKD3D_DATA_HALF; ++ case HLSL_TYPE_INT: ++ return VKD3D_DATA_INT; ++ case HLSL_TYPE_UINT: ++ case HLSL_TYPE_BOOL: ++ return VKD3D_DATA_UINT; ++ } ++ } ++ ++ vkd3d_unreachable(); ++} ++ ++static enum vkd3d_data_type vsir_data_type_from_hlsl_instruction(struct hlsl_ctx *ctx, ++ const struct hlsl_ir_node *instr) ++{ ++ return vsir_data_type_from_hlsl_type(ctx, instr->data_type); ++} ++ ++static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask) + { + uint32_t swizzle; + +@@ -6737,6 +6773,50 @@ static struct vkd3d_shader_instruction *generate_vsir_add_program_instruction( + return ins; + } + ++static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src, ++ struct hlsl_ctx *ctx, const struct hlsl_constant_value *value, ++ enum vkd3d_data_type type, unsigned int width, unsigned int map_writemask) ++{ ++ unsigned int i, j; ++ ++ vsir_src_param_init(src, VKD3DSPR_IMMCONST, type, 0); ++ if (width == 1) ++ { ++ src->reg.u.immconst_u32[0] = value->u[0].u; ++ return; ++ } ++ ++ src->reg.dimension = VSIR_DIMENSION_VEC4; ++ for (i = 0, j = 0; i < 4; ++i) ++ { ++ if ((map_writemask & (1u << i)) && (j < width)) ++ src->reg.u.immconst_u32[i] = value->u[j++].u; ++ else ++ src->reg.u.immconst_u32[i] = 0; ++ } ++} ++ ++static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src, ++ struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, uint32_t map_writemask) ++{ ++ struct hlsl_ir_constant *constant; ++ ++ if (hlsl_version_ge(ctx, 4, 0) && instr->type == HLSL_IR_CONSTANT) ++ { ++ /* In SM4 constants are inlined */ ++ constant = hlsl_ir_constant(instr); ++ vsir_src_from_hlsl_constant_value(src, ctx, &constant->value, ++ vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->dimx, map_writemask); ++ } ++ else ++ { ++ vsir_register_init(&src->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); ++ src->reg.idx[0].offset = instr->reg.id; ++ src->reg.dimension = VSIR_DIMENSION_VEC4; ++ src->swizzle = generate_vsir_get_src_swizzle(instr->reg.writemask, map_writemask); ++ } ++} ++ + static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_constant *constant) + { +@@ -6754,7 +6834,7 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = constant->reg.id; +- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); ++ src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +@@ -6763,9 +6843,9 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, + } + + /* Translate ops that can be mapped to a single vsir instruction with only one dst register. */ +-static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, struct vsir_program *program, +- struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, uint32_t src_mod, uint32_t dst_mod, +- bool map_src_swizzles) ++static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, ++ uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles) + { + struct hlsl_ir_node *instr = &expr->node; + struct vkd3d_shader_dst_param *dst_param; +@@ -6786,8 +6866,9 @@ static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, s + return; + + dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); + dst_param->reg.idx[0].offset = instr->reg.id; ++ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; + dst_param->write_mask = instr->reg.writemask; + dst_param->modifiers = dst_mod; + +@@ -6796,9 +6877,7 @@ static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, s + struct hlsl_ir_node *operand = expr->operands[i].node; + + src_param = &ins->src[i]; +- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- src_param->reg.idx[0].offset = operand->reg.id; +- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, ++ vsir_src_from_hlsl_node(src_param, ctx, operand, + map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL); + src_param->modifiers = src_mod; + } +@@ -6820,7 +6899,7 @@ static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx + VKD3D_ASSERT(instr->reg.allocated); + VKD3D_ASSERT(operand); + +- src_swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask); ++ src_swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask); + for (i = 0; i < 4; ++i) + { + if (instr->reg.writemask & (1u << i)) +@@ -6866,7 +6945,7 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi + src_param = &ins->src[0]; + vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = operand->reg.id; +- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); ++ src_param->swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); + + if (ctx->profile->major_version < 3) + { +@@ -6908,13 +6987,13 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + /* Integrals are internally represented as floats, so no change is necessary.*/ + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + + case HLSL_TYPE_DOUBLE: + if (ctx->double_as_float_alias) + { +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + } + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +@@ -6939,7 +7018,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + + case HLSL_TYPE_BOOL: +@@ -6961,7 +7040,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx, + case HLSL_TYPE_FLOAT: + if (ctx->double_as_float_alias) + { +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + return true; + } + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, +@@ -7001,7 +7080,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr + switch (expr->op) + { + case HLSL_OP1_ABS: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true); + break; + + case HLSL_OP1_CAST: +@@ -7013,11 +7092,11 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr + break; + + case HLSL_OP1_DSX: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true); + break; + + case HLSL_OP1_DSY: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true); + break; + + case HLSL_OP1_EXP2: +@@ -7029,7 +7108,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr + break; + + case HLSL_OP1_NEG: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true); + break; + + case HLSL_OP1_RCP: +@@ -7037,7 +7116,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr + break; + + case HLSL_OP1_REINTERPRET: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true); + break; + + case HLSL_OP1_RSQ: +@@ -7045,7 +7124,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr + break; + + case HLSL_OP1_SAT: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true); + break; + + case HLSL_OP1_SIN_REDUCED: +@@ -7054,18 +7133,18 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr + break; + + case HLSL_OP2_ADD: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true); + break; + + case HLSL_OP2_DOT: + switch (expr->operands[0].node->data_type->dimx) + { + case 3: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false); + break; + + case 4: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false); + break; + + default: +@@ -7075,43 +7154,43 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr + break; + + case HLSL_OP2_MAX: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); + break; + + case HLSL_OP2_MIN: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); + break; + + case HLSL_OP2_MUL: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true); + break; + + case HLSL_OP1_FRACT: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true); + break; + + case HLSL_OP2_LOGIC_AND: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true); + break; + + case HLSL_OP2_LOGIC_OR: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true); + break; + + case HLSL_OP2_SLT: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true); + break; + + case HLSL_OP3_CMP: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true); + break; + + case HLSL_OP3_DP2ADD: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false); + break; + + case HLSL_OP3_MAD: +- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true); + break; + + default: +@@ -7227,7 +7306,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, + + vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1); + src_param->reg.idx[0].offset = register_index; +- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(writemask, dst_writemask); ++ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); + + if (deref->rel_offset.node) + hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir."); +@@ -7306,9 +7385,7 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + dst_param->write_mask = instr->reg.writemask; + + src_param = &ins->src[0]; +- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- src_param->reg.idx[0].offset = coords->reg.id; +- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(coords->reg.writemask, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL); + + sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource, + VKD3DSP_WRITEMASK_ALL, &ins->location); +@@ -7316,19 +7393,15 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, + if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) + { + src_param = &ins->src[2]; +- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- src_param->reg.idx[0].offset = ddx->reg.id; +- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddx->reg.writemask, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(src_param, ctx, ddx, VKD3DSP_WRITEMASK_ALL); + + src_param = &ins->src[3]; +- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- src_param->reg.idx[0].offset = ddy->reg.id; +- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddy->reg.writemask, VKD3DSP_WRITEMASK_ALL); ++ vsir_src_from_hlsl_node(src_param, ctx, ddy, VKD3DSP_WRITEMASK_ALL); + } + } + +-static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_program *program, +- struct hlsl_ir_swizzle *swizzle_instr) ++static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr) + { + struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node; + struct vkd3d_shader_dst_param *dst_param; +@@ -7342,8 +7415,9 @@ static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_pr + return; + + dst_param = &ins->dst[0]; +- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); + dst_param->reg.idx[0].offset = instr->reg.id; ++ dst_param->reg.dimension = VSIR_DIMENSION_VEC4; + dst_param->write_mask = instr->reg.writemask; + + swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); +@@ -7352,8 +7426,10 @@ static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_pr + swizzle = vsir_swizzle_from_hlsl(swizzle); + + src_param = &ins->src[0]; +- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); ++ VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT); ++ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, val), 1); + src_param->reg.idx[0].offset = val->reg.id; ++ src_param->reg.dimension = VSIR_DIMENSION_VEC4; + src_param->swizzle = swizzle; + } + +@@ -7371,9 +7447,7 @@ static void sm1_generate_vsir_instr_store(struct hlsl_ctx *ctx, struct vsir_prog + sm1_generate_vsir_init_dst_param_from_deref(ctx, &ins->dst[0], &store->lhs, &ins->location, store->writemask); + + src_param = &ins->src[0]; +- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- src_param->reg.idx[0].offset = rhs->reg.id; +- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(rhs->reg.writemask, ins->dst[0].write_mask); ++ vsir_src_from_hlsl_node(src_param, ctx, rhs, ins->dst[0].write_mask); + } + + static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, +@@ -7408,7 +7482,6 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program + struct vkd3d_shader_src_param *src_param; + struct hlsl_ir_node *instr = &iff->node; + struct vkd3d_shader_instruction *ins; +- uint32_t swizzle; + + if (hlsl_version_lt(ctx, 2, 1)) + { +@@ -7421,19 +7494,12 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program + return; + ins->flags = VKD3D_SHADER_REL_OP_NE; + +- swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask); +- swizzle = vsir_swizzle_from_hlsl(swizzle); +- + src_param = &ins->src[0]; +- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- src_param->reg.idx[0].offset = condition->reg.id; +- src_param->swizzle = swizzle; ++ vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL); + src_param->modifiers = 0; + + src_param = &ins->src[1]; +- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); +- src_param->reg.idx[0].offset = condition->reg.id; +- src_param->swizzle = swizzle; ++ vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL); + src_param->modifiers = VKD3DSPSM_NEG; + + sm1_generate_vsir_block(ctx, &iff->then_block, program); +@@ -7496,7 +7562,7 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo + break; + + case HLSL_IR_SWIZZLE: +- sm1_generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); ++ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); + break; + + default: +@@ -7557,6 +7623,25 @@ static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_progr + hlsl_block_add_instr(block, vsir_instr); + } + ++static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_node *instr) ++{ ++ struct vkd3d_shader_location *loc; ++ struct hlsl_ir_node *vsir_instr; ++ ++ loc = &program->instructions.elements[program->instructions.count - 1].location; ++ ++ if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, ++ program->instructions.count - 1, instr->data_type, &instr->reg, loc))) ++ { ++ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; ++ return; ++ } ++ ++ list_add_before(&instr->entry, &vsir_instr->entry); ++ hlsl_replace_node(instr, vsir_instr); ++} ++ + static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program, + uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc) + { +@@ -7589,6 +7674,60 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx, + add_last_vsir_instr_to_block(ctx, program, block); + } + ++static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, ++ struct vsir_program *program, struct hlsl_ir_expr *expr) ++{ ++ switch (expr->op) ++ { ++ case HLSL_OP1_ABS: ++ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true); ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program) ++{ ++ struct hlsl_ir_node *instr, *next; ++ ++ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry) ++ { ++ if (instr->data_type) ++ { ++ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR) ++ { ++ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class); ++ break; ++ } ++ } ++ ++ switch (instr->type) ++ { ++ case HLSL_IR_CALL: ++ vkd3d_unreachable(); ++ ++ case HLSL_IR_CONSTANT: ++ /* In SM4 all constants are inlined. */ ++ break; ++ ++ case HLSL_IR_EXPR: ++ if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr))) ++ replace_instr_with_last_vsir_instr(ctx, program, instr); ++ break; ++ ++ case HLSL_IR_SWIZZLE: ++ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); ++ replace_instr_with_last_vsir_instr(ctx, program, instr); ++ break; ++ ++ default: ++ break; ++ } ++ } ++} ++ + static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, + struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program) + { +@@ -7631,6 +7770,8 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx, + list_move_head(&func->body.instrs, &block.instrs); + + hlsl_block_cleanup(&block); ++ ++ sm4_generate_vsir_block(ctx, &func->body, program); + } + + /* OBJECTIVE: Translate all the information from ctx and entry_func to the +diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c +index 19dc36d9191..9b50a308e11 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/ir.c ++++ b/libs/vkd3d/libs/vkd3d-shader/ir.c +@@ -129,6 +129,38 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( + return NULL; + } + ++static struct signature_element *vsir_signature_find_element_by_name( ++ const struct shader_signature *signature, const char *semantic_name, unsigned int semantic_index) ++{ ++ for (unsigned int i = 0; i < signature->element_count; ++i) ++ { ++ if (!ascii_strcasecmp(signature->elements[i].semantic_name, semantic_name) ++ && signature->elements[i].semantic_index == semantic_index) ++ return &signature->elements[i]; ++ } ++ ++ return NULL; ++} ++ ++static bool vsir_signature_find_sysval(const struct shader_signature *signature, ++ enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index) ++{ ++ const struct signature_element *e; ++ unsigned int i; ++ ++ for (i = 0; i < signature->element_count; ++i) ++ { ++ e = &signature->elements[i]; ++ if (e->sysval_semantic == sysval && e->semantic_index == semantic_index) ++ { ++ *element_index = i; ++ return true; ++ } ++ } ++ ++ return false; ++} ++ + void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type, + enum vkd3d_data_type data_type, unsigned int idx_count) + { +@@ -277,6 +309,15 @@ static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigne + dst->reg.idx[0].offset = idx; + } + ++static void dst_param_init_output(struct vkd3d_shader_dst_param *dst, ++ enum vkd3d_data_type data_type, uint32_t idx, uint32_t write_mask) ++{ ++ vsir_dst_param_init(dst, VKD3DSPR_OUTPUT, data_type, 1); ++ dst->reg.idx[0].offset = idx; ++ dst->reg.dimension = VSIR_DIMENSION_VEC4; ++ dst->write_mask = write_mask; ++} ++ + void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, + enum vkd3d_shader_opcode opcode) + { +@@ -789,6 +830,98 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i + shader_register_eliminate_phase_addressing(&ins->dst[i].reg, instance_id); + } + ++/* Ensure that the program closes with a ret. sm1 programs do not, by default. ++ * Many of our IR passes rely on this in order to insert instructions at the ++ * end of execution. */ ++static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ static const struct vkd3d_shader_location no_loc; ++ if (program->instructions.count ++ && program->instructions.elements[program->instructions.count - 1].opcode == VKD3DSIH_RET) ++ return VKD3D_OK; ++ ++ if (!shader_instruction_array_insert_at(&program->instructions, program->instructions.count, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ vsir_instruction_init(&program->instructions.elements[program->instructions.count - 1], &no_loc, VKD3DSIH_RET); ++ return VKD3D_OK; ++} ++ ++static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ struct shader_signature *signature = &program->output_signature; ++ struct signature_element *new_elements, *e; ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX) ++ return VKD3D_OK; ++ ++ if ((e = vsir_signature_find_element_by_name(signature, "COLOR", 0))) ++ { ++ program->diffuse_written_mask = e->mask; ++ e->mask = VKD3DSP_WRITEMASK_ALL; ++ ++ return VKD3D_OK; ++ } ++ ++ if (!(new_elements = vkd3d_realloc(signature->elements, ++ (signature->element_count + 1) * sizeof(*signature->elements)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ signature->elements = new_elements; ++ e = &signature->elements[signature->element_count++]; ++ memset(e, 0, sizeof(*e)); ++ e->semantic_name = vkd3d_strdup("COLOR"); ++ e->sysval_semantic = VKD3D_SHADER_SV_NONE; ++ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ e->register_count = 1; ++ e->mask = VKD3DSP_WRITEMASK_ALL; ++ e->used_mask = VKD3DSP_WRITEMASK_ALL; ++ e->register_index = SM1_COLOR_REGISTER_OFFSET; ++ e->target_location = SM1_COLOR_REGISTER_OFFSET; ++ e->interpolation_mode = VKD3DSIM_NONE; ++ ++ return VKD3D_OK; ++} ++ ++/* Uninitialized components of diffuse yield 1.0 in SM1-2. Implement this by ++ * always writing diffuse in those versions, even if the PS doesn't read it. */ ++static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *program, ++ struct vsir_transformation_context *ctx) ++{ ++ static const struct vkd3d_shader_location no_loc; ++ struct vkd3d_shader_instruction *ins; ++ unsigned int i; ++ ++ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX ++ || program->diffuse_written_mask == VKD3DSP_WRITEMASK_ALL) ++ return VKD3D_OK; ++ ++ /* Write the instruction after all LABEL, DCL, and NOP instructions. ++ * We need to skip NOP instructions because they might result from removed ++ * DCLs, and there could still be DCLs after NOPs. */ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ ins = &program->instructions.elements[i]; ++ ++ if (!vsir_instruction_is_dcl(ins) && ins->opcode != VKD3DSIH_LABEL && ins->opcode != VKD3DSIH_NOP) ++ break; ++ } ++ ++ if (!shader_instruction_array_insert_at(&program->instructions, i, 1)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ins = &program->instructions.elements[i]; ++ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); ++ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VKD3D_DATA_FLOAT, 1); ++ ins->dst[0].reg.idx[0].offset = 0; ++ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL & ~program->diffuse_written_mask; ++ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ for (i = 0; i < 4; ++i) ++ ins->src[0].reg.u.immconst_f32[i] = 1.0f; ++ return VKD3D_OK; ++} ++ + static const struct vkd3d_shader_varying_map *find_varying_map( + const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx) + { +@@ -803,14 +936,88 @@ static const struct vkd3d_shader_varying_map *find_varying_map( + return NULL; + } + ++static bool target_allows_subset_masks(const struct vkd3d_shader_compile_info *info) ++{ ++ const struct vkd3d_shader_spirv_target_info *spirv_info; ++ enum vkd3d_shader_spirv_environment environment; ++ ++ switch (info->target_type) ++ { ++ case VKD3D_SHADER_TARGET_SPIRV_BINARY: ++ spirv_info = vkd3d_find_struct(info->next, SPIRV_TARGET_INFO); ++ environment = spirv_info ? spirv_info->environment : VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0; ++ ++ switch (environment) ++ { ++ case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5: ++ return true; ++ ++ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0: ++ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1: ++ /* FIXME: Allow KHR_maintenance4. */ ++ return false; ++ ++ default: ++ FIXME("Unrecognized environment %#x.\n", environment); ++ return false; ++ } ++ ++ default: ++ return true; ++ } ++} ++ ++static void remove_unread_output_components(const struct shader_signature *signature, ++ struct vkd3d_shader_instruction *ins, struct vkd3d_shader_dst_param *dst) ++{ ++ const struct signature_element *e; ++ ++ switch (dst->reg.type) ++ { ++ case VKD3DSPR_OUTPUT: ++ e = vsir_signature_find_element_for_reg(signature, dst->reg.idx[0].offset, 0); ++ break; ++ ++ case VKD3DSPR_ATTROUT: ++ e = vsir_signature_find_element_for_reg(signature, ++ SM1_COLOR_REGISTER_OFFSET + dst->reg.idx[0].offset, 0); ++ break; ++ ++ case VKD3DSPR_RASTOUT: ++ e = vsir_signature_find_element_for_reg(signature, ++ SM1_RASTOUT_REGISTER_OFFSET + dst->reg.idx[0].offset, 0); ++ break; ++ ++ default: ++ return; ++ } ++ ++ /* We already changed the mask earlier. */ ++ dst->write_mask &= e->mask; ++ ++ if (!dst->write_mask) ++ { ++ if (ins->dst_count == 1) ++ vkd3d_shader_instruction_make_nop(ins); ++ else ++ vsir_dst_param_init(dst, VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); ++ } ++} ++ + static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, + struct vsir_transformation_context *ctx) + { + const struct vkd3d_shader_location location = {.source_name = ctx->compile_info->source_name}; + struct vkd3d_shader_message_context *message_context = ctx->message_context; + const struct vkd3d_shader_compile_info *compile_info = ctx->compile_info; ++ bool allows_subset_masks = target_allows_subset_masks(compile_info); + struct shader_signature *signature = &program->output_signature; ++ unsigned int orig_element_count = signature->element_count; + const struct vkd3d_shader_varying_map_info *varying_map; ++ struct signature_element *new_elements, *e; ++ unsigned int uninit_varying_count = 0; ++ unsigned int subset_varying_count = 0; ++ unsigned int new_register_count = 0; + unsigned int i; + + if (!(varying_map = vkd3d_find_struct(compile_info->next, VARYING_MAP_INFO))) +@@ -819,22 +1026,29 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program + for (i = 0; i < signature->element_count; ++i) + { + const struct vkd3d_shader_varying_map *map = find_varying_map(varying_map, i); +- struct signature_element *e = &signature->elements[i]; + ++ e = &signature->elements[i]; + if (map) + { + unsigned int input_mask = map->input_mask; + + e->target_location = map->input_register_index; + +- /* It is illegal in Vulkan if the next shader uses the same varying +- * location with a different mask. */ +- if (input_mask && input_mask != e->mask) ++ if ((input_mask & e->mask) == input_mask) ++ { ++ ++subset_varying_count; ++ if (!allows_subset_masks) ++ { ++ e->mask = input_mask; ++ e->used_mask &= input_mask; ++ } ++ } ++ else if (input_mask && input_mask != e->mask) + { + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Aborting due to not yet implemented feature: " +- "Output mask %#x does not match input mask %#x.", +- e->mask, input_mask); ++ "Input mask %#x reads components not written in output mask %#x.", ++ input_mask, e->mask); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + } +@@ -842,17 +1056,103 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program + { + e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED; + } ++ ++ new_register_count = max(new_register_count, e->register_index + 1); + } + ++ /* Handle uninitialized varyings by writing them before every ret. ++ * ++ * As far as sm1-sm3 is concerned, drivers disagree on what uninitialized ++ * varyings contain. ++ * ++ * - Diffuse (COLOR0) reliably contains (1, 1, 1, 1) in SM1/2. ++ * In SM3 it may contain (0, 0, 0, 0), (0, 0, 0, 1), or (1, 1, 1, 1). ++ * ++ * - Specular (COLOR1) contains (0, 0, 0, 0) or (0, 0, 0, 1). ++ * WARP writes (1, 1, 1, 1). ++ * ++ * - Anything else contains (0, 0, 0, 0) or (0, 0, 0, 1). ++ * ++ * We don't have enough knowledge to identify diffuse here. Instead we deal ++ * with that in vsir_program_ensure_diffuse(), by always writing diffuse if ++ * the shader doesn't. ++ */ ++ + for (i = 0; i < varying_map->varying_count; ++i) + { + if (varying_map->varying_map[i].output_signature_index >= signature->element_count) ++ ++uninit_varying_count; ++ } ++ ++ if (!(new_elements = vkd3d_realloc(signature->elements, ++ (signature->element_count + uninit_varying_count) * sizeof(*signature->elements)))) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ signature->elements = new_elements; ++ ++ for (i = 0; i < varying_map->varying_count; ++i) ++ { ++ const struct vkd3d_shader_varying_map *map = &varying_map->varying_map[i]; ++ ++ if (map->output_signature_index < orig_element_count) ++ continue; ++ ++ TRACE("Synthesizing zero value for uninitialized output %u (mask %u).\n", ++ map->input_register_index, map->input_mask); ++ e = &signature->elements[signature->element_count++]; ++ memset(e, 0, sizeof(*e)); ++ e->sysval_semantic = VKD3D_SHADER_SV_NONE; ++ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT; ++ e->register_count = 1; ++ e->mask = map->input_mask; ++ e->used_mask = map->input_mask; ++ e->register_index = new_register_count++; ++ e->target_location = map->input_register_index; ++ e->interpolation_mode = VKD3DSIM_LINEAR; ++ } ++ ++ /* Write each uninitialized varying before each ret. */ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ struct vkd3d_shader_location loc; ++ ++ if (ins->opcode != VKD3DSIH_RET) ++ continue; ++ ++ loc = ins->location; ++ if (!shader_instruction_array_insert_at(&program->instructions, i, uninit_varying_count)) ++ return VKD3D_ERROR_OUT_OF_MEMORY; ++ ++ ins = &program->instructions.elements[i]; ++ ++ for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j) + { +- vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, +- "Aborting due to not yet implemented feature: " +- "The next stage consumes varyings not written by this stage."); +- return VKD3D_ERROR_NOT_IMPLEMENTED; ++ e = &signature->elements[j]; ++ ++ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); ++ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, e->register_index, e->mask); ++ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0); ++ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; ++ ++ins; + } ++ ++ i += uninit_varying_count; ++ } ++ ++ /* Vulkan (without KHR_maintenance4) disallows any mismatching masks, ++ * including when the input mask is a proper subset of the output mask. ++ * Resolve this by rewriting the shader to remove unread components from ++ * any writes to the output variable. */ ++ ++ if (!subset_varying_count || allows_subset_masks) ++ return VKD3D_OK; ++ ++ for (i = 0; i < program->instructions.count; ++i) ++ { ++ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; ++ ++ for (unsigned int j = 0; j < ins->dst_count; ++j) ++ remove_unread_output_components(signature, ins, &ins->dst[j]); + } + + return VKD3D_OK; +@@ -2490,15 +2790,14 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla + static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener, + struct vkd3d_shader_message_context *message_context) + { +- bool main_block_open, is_hull_shader, after_declarations_section; + struct vkd3d_shader_instruction_array *instructions; + struct vsir_program *program = flattener->program; ++ bool is_hull_shader, after_declarations_section; + struct vkd3d_shader_instruction *dst_ins; + size_t i; + + instructions = &program->instructions; + is_hull_shader = program->shader_version.type == VKD3D_SHADER_TYPE_HULL; +- main_block_open = !is_hull_shader; + after_declarations_section = is_hull_shader; + + if (!cf_flattener_require_space(flattener, instructions->count + 1)) +@@ -2822,8 +3121,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + + if (cf_info) + cf_info->inside_block = false; +- else +- main_block_open = false; + break; + + default: +@@ -2833,14 +3130,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte + } + } + +- if (main_block_open) +- { +- if (!(dst_ins = cf_flattener_require_space(flattener, 1))) +- return VKD3D_ERROR_OUT_OF_MEMORY; +- vsir_instruction_init(dst_ins, &flattener->location, VKD3DSIH_RET); +- ++flattener->instruction_count; +- } +- + return flattener->status; + } + +@@ -5564,21 +5853,6 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru + return VKD3D_OK; + } + +-static bool find_colour_signature_idx(const struct shader_signature *signature, uint32_t *index) +-{ +- for (unsigned int i = 0; i < signature->element_count; ++i) +- { +- if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_TARGET +- && !signature->elements[i].register_index) +- { +- *index = i; +- return true; +- } +- } +- +- return false; +-} +- + static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program, + const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func, + const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, +@@ -5690,7 +5964,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + return VKD3D_OK; + +- if (!find_colour_signature_idx(&program->output_signature, &colour_signature_idx) ++ if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx) + || !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3)) + return VKD3D_OK; + +@@ -5808,21 +6082,6 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog + return VKD3D_OK; + } + +-static bool find_sysval_signature_idx(const struct shader_signature *signature, +- enum vkd3d_shader_sysval_semantic sysval, uint32_t *idx) +-{ +- for (unsigned int i = 0; i < signature->element_count; ++i) +- { +- if (signature->elements[i].sysval_semantic == sysval) +- { +- *idx = i; +- return true; +- } +- } +- +- return false; +-} +- + static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *program, + struct vsir_transformation_context *ctx) + { +@@ -5878,7 +6137,7 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr + } + } + +- if (!find_sysval_signature_idx(signature, VKD3D_SHADER_SV_POSITION, &position_signature_idx)) ++ if (!vsir_signature_find_sysval(signature, VKD3D_SHADER_SV_POSITION, 0, &position_signature_idx)) + { + vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC, + "Shader does not write position."); +@@ -7988,6 +8247,31 @@ static void vsir_transform_( + } + } + ++/* Transformations which should happen at parse time, i.e. before scan ++ * information is returned to the user. ++ * ++ * In particular, some passes need to modify the signature, and ++ * vkd3d_shader_scan() should report the modified signature for the given ++ * target. */ ++enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) ++{ ++ struct vsir_transformation_context ctx = ++ { ++ .result = VKD3D_OK, ++ .program = program, ++ .config_flags = config_flags, ++ .compile_info = compile_info, ++ .message_context = message_context, ++ }; ++ ++ /* For vsir_program_ensure_diffuse(). */ ++ if (program->shader_version.major <= 2) ++ vsir_transform(&ctx, vsir_program_add_diffuse_output); ++ ++ return ctx.result; ++} ++ + enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) + { +@@ -8012,6 +8296,11 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t + } + else + { ++ vsir_transform(&ctx, vsir_program_ensure_ret); ++ ++ if (program->shader_version.major <= 2) ++ vsir_transform(&ctx, vsir_program_ensure_diffuse); ++ + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + vsir_transform(&ctx, vsir_program_remap_output_signature); + +diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c +index befe5eacf9c..bbd2f761d29 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c ++++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c +@@ -674,6 +674,7 @@ struct sm4_index_range_array + struct vkd3d_sm4_lookup_tables + { + const struct vkd3d_sm4_opcode_info *opcode_info_from_sm4[VKD3D_SM4_OP_COUNT]; ++ const struct vkd3d_sm4_opcode_info *opcode_info_from_vsir[VKD3DSIH_COUNT]; + const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT]; + const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT]; + const struct vkd3d_sm4_stat_field_info *stat_field_from_sm4[VKD3D_SM4_OP_COUNT]; +@@ -1412,6 +1413,8 @@ struct tpf_compiler + struct vkd3d_sm4_lookup_tables lookup; + struct sm4_stat *stat; + ++ int result; ++ + struct vkd3d_bytecode_buffer *buffer; + struct dxbc_writer dxbc; + }; +@@ -1903,6 +1906,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup) + const struct vkd3d_sm4_opcode_info *info = &opcode_table[i]; + + lookup->opcode_info_from_sm4[info->opcode] = info; ++ lookup->opcode_info_from_vsir[info->handler_idx] = info; + } + + for (i = 0; i < ARRAY_SIZE(register_type_table); ++i) +@@ -1929,6 +1933,24 @@ static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode( + return lookup->opcode_info_from_sm4[sm4_opcode]; + } + ++static const struct vkd3d_sm4_opcode_info *get_info_from_vsir_opcode( ++ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_opcode vsir_opcode) ++{ ++ if (vsir_opcode >= VKD3DSIH_COUNT) ++ return NULL; ++ return lookup->opcode_info_from_vsir[vsir_opcode]; ++} ++ ++static unsigned int opcode_info_get_dst_count(const struct vkd3d_sm4_opcode_info *info) ++{ ++ return strnlen(info->dst_info, SM4_MAX_DST_COUNT); ++} ++ ++static unsigned int opcode_info_get_src_count(const struct vkd3d_sm4_opcode_info *info) ++{ ++ return strnlen(info->src_info, SM4_MAX_SRC_COUNT); ++} ++ + static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type( + const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type) + { +@@ -2651,8 +2673,8 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str + ins->raw = false; + ins->structured = false; + ins->predicate = NULL; +- ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT); +- ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT); ++ ins->dst_count = opcode_info_get_dst_count(opcode_info); ++ ins->src_count = opcode_info_get_src_count(opcode_info); + ins->src = src_params = vsir_program_get_src_params(program, ins->src_count); + if (!src_params && ins->src_count) + { +@@ -2971,7 +2993,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con + return VKD3D_OK; + } + +-static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block); ++static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block); + + static bool type_is_integer(const struct hlsl_type *type) + { +@@ -6094,11 +6116,12 @@ static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_ + hlsl_release_string_buffer(tpf->ctx, dst_type_string); + } + +-static void write_sm4_if(const struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) ++static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff) + { + struct sm4_instruction instr = + { +- .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ, ++ .opcode = VKD3D_SM4_OP_IF, ++ .extra_bits = VKD3D_SM4_CONDITIONAL_NZ, + .src_count = 1, + }; + +@@ -6210,7 +6233,7 @@ static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_ + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_loop(const struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) ++static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop) + { + struct sm4_instruction instr = + { +@@ -6394,7 +6417,7 @@ static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir + write_sm4_instruction(tpf, &instr); + } + +-static void write_sm4_switch(const struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) ++static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s) + { + const struct hlsl_ir_node *selector = s->selector.node; + struct hlsl_ir_switch_case *c; +@@ -6455,7 +6478,46 @@ static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_ + write_sm4_instruction(tpf, &instr); + } + +-static void tpf_handle_instruction(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) ++static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) ++{ ++ const struct vkd3d_sm4_opcode_info *info; ++ struct sm4_instruction instr = {0}; ++ unsigned int dst_count, src_count; ++ ++ info = get_info_from_vsir_opcode(&tpf->lookup, ins->opcode); ++ VKD3D_ASSERT(info); ++ ++ dst_count = opcode_info_get_dst_count(info); ++ src_count = opcode_info_get_src_count(info); ++ ++ if (ins->dst_count != dst_count) ++ { ++ ERR("Invalid destination count %u for vsir instruction %#x (expected %u).\n", ++ ins->dst_count, ins->opcode, dst_count); ++ tpf->result = VKD3D_ERROR_INVALID_SHADER; ++ return; ++ } ++ if (ins->src_count != src_count) ++ { ++ ERR("Invalid source count %u for vsir instruction %#x (expected %u).\n", ++ ins->src_count, ins->opcode, src_count); ++ tpf->result = VKD3D_ERROR_INVALID_SHADER; ++ return; ++ } ++ ++ instr.opcode = info->opcode; ++ instr.dst_count = ins->dst_count; ++ instr.src_count = ins->src_count; ++ ++ for (unsigned int i = 0; i < ins->dst_count; ++i) ++ instr.dsts[i] = ins->dst[i]; ++ for (unsigned int i = 0; i < ins->src_count; ++i) ++ instr.srcs[i] = ins->src[i]; ++ ++ write_sm4_instruction(tpf, &instr); ++} ++ ++static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins) + { + switch (ins->opcode) + { +@@ -6467,13 +6529,17 @@ static void tpf_handle_instruction(const struct tpf_compiler *tpf, const struct + tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp); + break; + ++ case VKD3DSIH_MOV: ++ tpf_simple_instruction(tpf, ins); ++ break; ++ + default: + vkd3d_unreachable(); + break; + } + } + +-static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block) ++static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block) + { + const struct hlsl_ir_node *instr; + unsigned int vsir_instr_idx; +@@ -6765,7 +6831,13 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags, + tpf_write_sfi0(&tpf); + tpf_write_stat(&tpf); + +- if (!(ret = ctx->result)) ++ ret = VKD3D_OK; ++ if (ctx->result) ++ ret = ctx->result; ++ if (tpf.result) ++ ret = tpf.result; ++ ++ if (!ret) + ret = dxbc_writer_write(&tpf.dxbc, out); + for (i = 0; i < tpf.dxbc.section_count; ++i) + vkd3d_shader_free_shader_code(&tpf.dxbc.sections[i].data); +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +index 3355e18b88e..3afac9a38a4 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +@@ -719,8 +719,11 @@ static enum vkd3d_result vsir_parse(const struct vkd3d_shader_compile_info *comp + vsir_program_trace(program); + + vsir_program_cleanup(program); ++ return ret; + } + ++ if (compile_info->target_type != VKD3D_SHADER_TARGET_NONE) ++ ret = vsir_program_transform_early(program, config_flags, compile_info, message_context); + return ret; + } + +diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +index 5ae938e0525..1b6c37343d1 100644 +--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h ++++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +@@ -585,6 +585,8 @@ enum vkd3d_shader_opcode + VKD3DSIH_XOR, + + VKD3DSIH_INVALID, ++ ++ VKD3DSIH_COUNT, + }; + + enum vkd3d_shader_register_type +@@ -1424,6 +1426,7 @@ struct vsir_program + bool use_vocp; + bool has_point_size; + bool has_point_coord; ++ uint8_t diffuse_written_mask; + enum vsir_control_flow_type cf_type; + enum vsir_normalisation_level normalisation_level; + +@@ -1442,6 +1445,8 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c + enum vsir_normalisation_level normalisation_level); + enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); ++enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags, ++ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); + enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, + const char *source_name, struct vkd3d_shader_message_context *message_context); + struct vkd3d_shader_src_param *vsir_program_create_outpointid_param( +-- +2.45.2 +