wine-staging/patches/vkd3d-latest/0006-Updated-vkd3d-to-794f4c30f4873841aaa0c5c9745ee732437.patch

1339 lines
56 KiB
Diff
Raw Normal View History

2024-11-06 17:25:06 -08:00
From 18c0eb6e1f1ab90c9a0f7c39c56618996e1a982f Mon Sep 17 00:00:00 2001
2024-11-04 16:33:35 -08:00
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Tue, 5 Nov 2024 07:37:21 +1100
Subject: [PATCH] Updated vkd3d to 794f4c30f4873841aaa0c5c9745ee732437e70db.
---
libs/vkd3d/libs/vkd3d-shader/fx.c | 2 +-
libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 265 +++++++++---
libs/vkd3d/libs/vkd3d-shader/ir.c | 397 +++++++++++++++---
libs/vkd3d/libs/vkd3d-shader/tpf.c | 92 +++-
.../libs/vkd3d-shader/vkd3d_shader_main.c | 3 +
.../libs/vkd3d-shader/vkd3d_shader_private.h | 5 +
6 files changed, 637 insertions(+), 127 deletions(-)
diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c
index 8954feb22b7..5382dd94f98 100644
--- a/libs/vkd3d/libs/vkd3d-shader/fx.c
+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c
@@ -3295,7 +3295,7 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int
parse_fx_print_indent(parser);
vkd3d_string_buffer_printf(&parser->buffer, "}");
- if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER && gs_so.sodecl)
+ if (object_type == FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO && gs_so.sodecl)
{
vkd3d_string_buffer_printf(&parser->buffer, "\n/* Stream output declaration: \"%s\" */",
fx_4_get_string(parser, gs_so.sodecl));
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index ce431ee6815..213e403dcbd 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -6579,7 +6579,43 @@ static void generate_vsir_signature(struct hlsl_ctx *ctx,
}
}
-static uint32_t sm1_generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask)
+static enum vkd3d_data_type vsir_data_type_from_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type)
+{
+ if (hlsl_version_lt(ctx, 4, 0))
+ return VKD3D_DATA_FLOAT;
+
+ if (type->class == HLSL_CLASS_ARRAY)
+ return vsir_data_type_from_hlsl_type(ctx, type->e.array.type);
+ if (type->class == HLSL_CLASS_STRUCT)
+ return VKD3D_DATA_MIXED;
+ if (type->class <= HLSL_CLASS_LAST_NUMERIC)
+ {
+ switch (type->e.numeric.type)
+ {
+ case HLSL_TYPE_DOUBLE:
+ return VKD3D_DATA_DOUBLE;
+ case HLSL_TYPE_FLOAT:
+ return VKD3D_DATA_FLOAT;
+ case HLSL_TYPE_HALF:
+ return VKD3D_DATA_HALF;
+ case HLSL_TYPE_INT:
+ return VKD3D_DATA_INT;
+ case HLSL_TYPE_UINT:
+ case HLSL_TYPE_BOOL:
+ return VKD3D_DATA_UINT;
+ }
+ }
+
+ vkd3d_unreachable();
+}
+
+static enum vkd3d_data_type vsir_data_type_from_hlsl_instruction(struct hlsl_ctx *ctx,
+ const struct hlsl_ir_node *instr)
+{
+ return vsir_data_type_from_hlsl_type(ctx, instr->data_type);
+}
+
+static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask)
{
uint32_t swizzle;
@@ -6737,6 +6773,50 @@ static struct vkd3d_shader_instruction *generate_vsir_add_program_instruction(
return ins;
}
+static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src,
+ struct hlsl_ctx *ctx, const struct hlsl_constant_value *value,
+ enum vkd3d_data_type type, unsigned int width, unsigned int map_writemask)
+{
+ unsigned int i, j;
+
+ vsir_src_param_init(src, VKD3DSPR_IMMCONST, type, 0);
+ if (width == 1)
+ {
+ src->reg.u.immconst_u32[0] = value->u[0].u;
+ return;
+ }
+
+ src->reg.dimension = VSIR_DIMENSION_VEC4;
+ for (i = 0, j = 0; i < 4; ++i)
+ {
+ if ((map_writemask & (1u << i)) && (j < width))
+ src->reg.u.immconst_u32[i] = value->u[j++].u;
+ else
+ src->reg.u.immconst_u32[i] = 0;
+ }
+}
+
+static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src,
+ struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, uint32_t map_writemask)
+{
+ struct hlsl_ir_constant *constant;
+
+ if (hlsl_version_ge(ctx, 4, 0) && instr->type == HLSL_IR_CONSTANT)
+ {
+ /* In SM4 constants are inlined */
+ constant = hlsl_ir_constant(instr);
+ vsir_src_from_hlsl_constant_value(src, ctx, &constant->value,
+ vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->dimx, map_writemask);
+ }
+ else
+ {
+ vsir_register_init(&src->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
+ src->reg.idx[0].offset = instr->reg.id;
+ src->reg.dimension = VSIR_DIMENSION_VEC4;
+ src->swizzle = generate_vsir_get_src_swizzle(instr->reg.writemask, map_writemask);
+ }
+}
+
static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx,
struct vsir_program *program, struct hlsl_ir_constant *constant)
{
@@ -6754,7 +6834,7 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx,
src_param = &ins->src[0];
vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
src_param->reg.idx[0].offset = constant->reg.id;
- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask);
+ src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask);
dst_param = &ins->dst[0];
vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
@@ -6763,9 +6843,9 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx,
}
/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */
-static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, struct vsir_program *program,
- struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode, uint32_t src_mod, uint32_t dst_mod,
- bool map_src_swizzles)
+static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode,
+ uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles)
{
struct hlsl_ir_node *instr = &expr->node;
struct vkd3d_shader_dst_param *dst_param;
@@ -6786,8 +6866,9 @@ static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, s
return;
dst_param = &ins->dst[0];
- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
dst_param->reg.idx[0].offset = instr->reg.id;
+ dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
dst_param->write_mask = instr->reg.writemask;
dst_param->modifiers = dst_mod;
@@ -6796,9 +6877,7 @@ static void sm1_generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx, s
struct hlsl_ir_node *operand = expr->operands[i].node;
src_param = &ins->src[i];
- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- src_param->reg.idx[0].offset = operand->reg.id;
- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask,
+ vsir_src_from_hlsl_node(src_param, ctx, operand,
map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL);
src_param->modifiers = src_mod;
}
@@ -6820,7 +6899,7 @@ static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx
VKD3D_ASSERT(instr->reg.allocated);
VKD3D_ASSERT(operand);
- src_swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask);
+ src_swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask);
for (i = 0; i < 4; ++i)
{
if (instr->reg.writemask & (1u << i))
@@ -6866,7 +6945,7 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi
src_param = &ins->src[0];
vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
src_param->reg.idx[0].offset = operand->reg.id;
- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL);
+ src_param->swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL);
if (ctx->profile->major_version < 3)
{
@@ -6908,13 +6987,13 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
/* Integrals are internally represented as floats, so no change is necessary.*/
case HLSL_TYPE_HALF:
case HLSL_TYPE_FLOAT:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
return true;
case HLSL_TYPE_DOUBLE:
if (ctx->double_as_float_alias)
{
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
return true;
}
hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
@@ -6939,7 +7018,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
return true;
case HLSL_TYPE_BOOL:
@@ -6961,7 +7040,7 @@ static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
case HLSL_TYPE_FLOAT:
if (ctx->double_as_float_alias)
{
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
return true;
}
hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
@@ -7001,7 +7080,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr
switch (expr->op)
{
case HLSL_OP1_ABS:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true);
break;
case HLSL_OP1_CAST:
@@ -7013,11 +7092,11 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr
break;
case HLSL_OP1_DSX:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true);
break;
case HLSL_OP1_DSY:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true);
break;
case HLSL_OP1_EXP2:
@@ -7029,7 +7108,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr
break;
case HLSL_OP1_NEG:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true);
break;
case HLSL_OP1_RCP:
@@ -7037,7 +7116,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr
break;
case HLSL_OP1_REINTERPRET:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
break;
case HLSL_OP1_RSQ:
@@ -7045,7 +7124,7 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr
break;
case HLSL_OP1_SAT:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true);
break;
case HLSL_OP1_SIN_REDUCED:
@@ -7054,18 +7133,18 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr
break;
case HLSL_OP2_ADD:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true);
break;
case HLSL_OP2_DOT:
switch (expr->operands[0].node->data_type->dimx)
{
case 3:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false);
break;
case 4:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false);
break;
default:
@@ -7075,43 +7154,43 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr
break;
case HLSL_OP2_MAX:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true);
break;
case HLSL_OP2_MIN:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true);
break;
case HLSL_OP2_MUL:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true);
break;
case HLSL_OP1_FRACT:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true);
break;
case HLSL_OP2_LOGIC_AND:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true);
break;
case HLSL_OP2_LOGIC_OR:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true);
break;
case HLSL_OP2_SLT:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true);
break;
case HLSL_OP3_CMP:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true);
break;
case HLSL_OP3_DP2ADD:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false);
break;
case HLSL_OP3_MAD:
- sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true);
break;
default:
@@ -7227,7 +7306,7 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx,
vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1);
src_param->reg.idx[0].offset = register_index;
- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(writemask, dst_writemask);
+ src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask);
if (deref->rel_offset.node)
hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir.");
@@ -7306,9 +7385,7 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx,
dst_param->write_mask = instr->reg.writemask;
src_param = &ins->src[0];
- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- src_param->reg.idx[0].offset = coords->reg.id;
- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(coords->reg.writemask, VKD3DSP_WRITEMASK_ALL);
+ vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL);
sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource,
VKD3DSP_WRITEMASK_ALL, &ins->location);
@@ -7316,19 +7393,15 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx,
if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD)
{
src_param = &ins->src[2];
- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- src_param->reg.idx[0].offset = ddx->reg.id;
- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddx->reg.writemask, VKD3DSP_WRITEMASK_ALL);
+ vsir_src_from_hlsl_node(src_param, ctx, ddx, VKD3DSP_WRITEMASK_ALL);
src_param = &ins->src[3];
- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- src_param->reg.idx[0].offset = ddy->reg.id;
- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddy->reg.writemask, VKD3DSP_WRITEMASK_ALL);
+ vsir_src_from_hlsl_node(src_param, ctx, ddy, VKD3DSP_WRITEMASK_ALL);
}
}
-static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_program *program,
- struct hlsl_ir_swizzle *swizzle_instr)
+static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr)
{
struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node;
struct vkd3d_shader_dst_param *dst_param;
@@ -7342,8 +7415,9 @@ static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_pr
return;
dst_param = &ins->dst[0];
- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
dst_param->reg.idx[0].offset = instr->reg.id;
+ dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
dst_param->write_mask = instr->reg.writemask;
swizzle = hlsl_swizzle_from_writemask(val->reg.writemask);
@@ -7352,8 +7426,10 @@ static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_pr
swizzle = vsir_swizzle_from_hlsl(swizzle);
src_param = &ins->src[0];
- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+ VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT);
+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, val), 1);
src_param->reg.idx[0].offset = val->reg.id;
+ src_param->reg.dimension = VSIR_DIMENSION_VEC4;
src_param->swizzle = swizzle;
}
@@ -7371,9 +7447,7 @@ static void sm1_generate_vsir_instr_store(struct hlsl_ctx *ctx, struct vsir_prog
sm1_generate_vsir_init_dst_param_from_deref(ctx, &ins->dst[0], &store->lhs, &ins->location, store->writemask);
src_param = &ins->src[0];
- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- src_param->reg.idx[0].offset = rhs->reg.id;
- src_param->swizzle = sm1_generate_vsir_get_src_swizzle(rhs->reg.writemask, ins->dst[0].write_mask);
+ vsir_src_from_hlsl_node(src_param, ctx, rhs, ins->dst[0].write_mask);
}
static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx,
@@ -7408,7 +7482,6 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program
struct vkd3d_shader_src_param *src_param;
struct hlsl_ir_node *instr = &iff->node;
struct vkd3d_shader_instruction *ins;
- uint32_t swizzle;
if (hlsl_version_lt(ctx, 2, 1))
{
@@ -7421,19 +7494,12 @@ static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program
return;
ins->flags = VKD3D_SHADER_REL_OP_NE;
- swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask);
- swizzle = vsir_swizzle_from_hlsl(swizzle);
-
src_param = &ins->src[0];
- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- src_param->reg.idx[0].offset = condition->reg.id;
- src_param->swizzle = swizzle;
+ vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL);
src_param->modifiers = 0;
src_param = &ins->src[1];
- vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
- src_param->reg.idx[0].offset = condition->reg.id;
- src_param->swizzle = swizzle;
+ vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL);
src_param->modifiers = VKD3DSPSM_NEG;
sm1_generate_vsir_block(ctx, &iff->then_block, program);
@@ -7496,7 +7562,7 @@ static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo
break;
case HLSL_IR_SWIZZLE:
- sm1_generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr));
+ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr));
break;
default:
@@ -7557,6 +7623,25 @@ static void add_last_vsir_instr_to_block(struct hlsl_ctx *ctx, struct vsir_progr
hlsl_block_add_instr(block, vsir_instr);
}
+static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_node *instr)
+{
+ struct vkd3d_shader_location *loc;
+ struct hlsl_ir_node *vsir_instr;
+
+ loc = &program->instructions.elements[program->instructions.count - 1].location;
+
+ if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx,
+ program->instructions.count - 1, instr->data_type, &instr->reg, loc)))
+ {
+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
+ return;
+ }
+
+ list_add_before(&instr->entry, &vsir_instr->entry);
+ hlsl_replace_node(instr, vsir_instr);
+}
+
static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program,
uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc)
{
@@ -7589,6 +7674,60 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx,
add_last_vsir_instr_to_block(ctx, program, block);
}
+static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_expr *expr)
+{
+ switch (expr->op)
+ {
+ case HLSL_OP1_ABS:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true);
+ return true;
+
+ default:
+ return false;
+ }
+}
+
+static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program)
+{
+ struct hlsl_ir_node *instr, *next;
+
+ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
+ {
+ if (instr->data_type)
+ {
+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
+ {
+ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class);
+ break;
+ }
+ }
+
+ switch (instr->type)
+ {
+ case HLSL_IR_CALL:
+ vkd3d_unreachable();
+
+ case HLSL_IR_CONSTANT:
+ /* In SM4 all constants are inlined. */
+ break;
+
+ case HLSL_IR_EXPR:
+ if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr)))
+ replace_instr_with_last_vsir_instr(ctx, program, instr);
+ break;
+
+ case HLSL_IR_SWIZZLE:
+ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr));
+ replace_instr_with_last_vsir_instr(ctx, program, instr);
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx,
struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program)
{
@@ -7631,6 +7770,8 @@ static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx,
list_move_head(&func->body.instrs, &block.instrs);
hlsl_block_cleanup(&block);
+
+ sm4_generate_vsir_block(ctx, &func->body, program);
}
/* OBJECTIVE: Translate all the information from ctx and entry_func to the
diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c
index 19dc36d9191..9b50a308e11 100644
--- a/libs/vkd3d/libs/vkd3d-shader/ir.c
+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c
@@ -129,6 +129,38 @@ const struct vkd3d_shader_parameter1 *vsir_program_get_parameter(
return NULL;
}
+static struct signature_element *vsir_signature_find_element_by_name(
+ const struct shader_signature *signature, const char *semantic_name, unsigned int semantic_index)
+{
+ for (unsigned int i = 0; i < signature->element_count; ++i)
+ {
+ if (!ascii_strcasecmp(signature->elements[i].semantic_name, semantic_name)
+ && signature->elements[i].semantic_index == semantic_index)
+ return &signature->elements[i];
+ }
+
+ return NULL;
+}
+
+static bool vsir_signature_find_sysval(const struct shader_signature *signature,
+ enum vkd3d_shader_sysval_semantic sysval, unsigned int semantic_index, unsigned int *element_index)
+{
+ const struct signature_element *e;
+ unsigned int i;
+
+ for (i = 0; i < signature->element_count; ++i)
+ {
+ e = &signature->elements[i];
+ if (e->sysval_semantic == sysval && e->semantic_index == semantic_index)
+ {
+ *element_index = i;
+ return true;
+ }
+ }
+
+ return false;
+}
+
void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type,
enum vkd3d_data_type data_type, unsigned int idx_count)
{
@@ -277,6 +309,15 @@ static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigne
dst->reg.idx[0].offset = idx;
}
+static void dst_param_init_output(struct vkd3d_shader_dst_param *dst,
+ enum vkd3d_data_type data_type, uint32_t idx, uint32_t write_mask)
+{
+ vsir_dst_param_init(dst, VKD3DSPR_OUTPUT, data_type, 1);
+ dst->reg.idx[0].offset = idx;
+ dst->reg.dimension = VSIR_DIMENSION_VEC4;
+ dst->write_mask = write_mask;
+}
+
void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location,
enum vkd3d_shader_opcode opcode)
{
@@ -789,6 +830,98 @@ static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_i
shader_register_eliminate_phase_addressing(&ins->dst[i].reg, instance_id);
}
+/* Ensure that the program closes with a ret. sm1 programs do not, by default.
+ * Many of our IR passes rely on this in order to insert instructions at the
+ * end of execution. */
+static enum vkd3d_result vsir_program_ensure_ret(struct vsir_program *program,
+ struct vsir_transformation_context *ctx)
+{
+ static const struct vkd3d_shader_location no_loc;
+ if (program->instructions.count
+ && program->instructions.elements[program->instructions.count - 1].opcode == VKD3DSIH_RET)
+ return VKD3D_OK;
+
+ if (!shader_instruction_array_insert_at(&program->instructions, program->instructions.count, 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ vsir_instruction_init(&program->instructions.elements[program->instructions.count - 1], &no_loc, VKD3DSIH_RET);
+ return VKD3D_OK;
+}
+
+static enum vkd3d_result vsir_program_add_diffuse_output(struct vsir_program *program,
+ struct vsir_transformation_context *ctx)
+{
+ struct shader_signature *signature = &program->output_signature;
+ struct signature_element *new_elements, *e;
+
+ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX)
+ return VKD3D_OK;
+
+ if ((e = vsir_signature_find_element_by_name(signature, "COLOR", 0)))
+ {
+ program->diffuse_written_mask = e->mask;
+ e->mask = VKD3DSP_WRITEMASK_ALL;
+
+ return VKD3D_OK;
+ }
+
+ if (!(new_elements = vkd3d_realloc(signature->elements,
+ (signature->element_count + 1) * sizeof(*signature->elements))))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ signature->elements = new_elements;
+ e = &signature->elements[signature->element_count++];
+ memset(e, 0, sizeof(*e));
+ e->semantic_name = vkd3d_strdup("COLOR");
+ e->sysval_semantic = VKD3D_SHADER_SV_NONE;
+ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT;
+ e->register_count = 1;
+ e->mask = VKD3DSP_WRITEMASK_ALL;
+ e->used_mask = VKD3DSP_WRITEMASK_ALL;
+ e->register_index = SM1_COLOR_REGISTER_OFFSET;
+ e->target_location = SM1_COLOR_REGISTER_OFFSET;
+ e->interpolation_mode = VKD3DSIM_NONE;
+
+ return VKD3D_OK;
+}
+
+/* Uninitialized components of diffuse yield 1.0 in SM1-2. Implement this by
+ * always writing diffuse in those versions, even if the PS doesn't read it. */
+static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *program,
+ struct vsir_transformation_context *ctx)
+{
+ static const struct vkd3d_shader_location no_loc;
+ struct vkd3d_shader_instruction *ins;
+ unsigned int i;
+
+ if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX
+ || program->diffuse_written_mask == VKD3DSP_WRITEMASK_ALL)
+ return VKD3D_OK;
+
+ /* Write the instruction after all LABEL, DCL, and NOP instructions.
+ * We need to skip NOP instructions because they might result from removed
+ * DCLs, and there could still be DCLs after NOPs. */
+ for (i = 0; i < program->instructions.count; ++i)
+ {
+ ins = &program->instructions.elements[i];
+
+ if (!vsir_instruction_is_dcl(ins) && ins->opcode != VKD3DSIH_LABEL && ins->opcode != VKD3DSIH_NOP)
+ break;
+ }
+
+ if (!shader_instruction_array_insert_at(&program->instructions, i, 1))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ ins = &program->instructions.elements[i];
+ vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1);
+ vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VKD3D_DATA_FLOAT, 1);
+ ins->dst[0].reg.idx[0].offset = 0;
+ ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
+ ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL & ~program->diffuse_written_mask;
+ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0);
+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
+ for (i = 0; i < 4; ++i)
+ ins->src[0].reg.u.immconst_f32[i] = 1.0f;
+ return VKD3D_OK;
+}
+
static const struct vkd3d_shader_varying_map *find_varying_map(
const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx)
{
@@ -803,14 +936,88 @@ static const struct vkd3d_shader_varying_map *find_varying_map(
return NULL;
}
+static bool target_allows_subset_masks(const struct vkd3d_shader_compile_info *info)
+{
+ const struct vkd3d_shader_spirv_target_info *spirv_info;
+ enum vkd3d_shader_spirv_environment environment;
+
+ switch (info->target_type)
+ {
+ case VKD3D_SHADER_TARGET_SPIRV_BINARY:
+ spirv_info = vkd3d_find_struct(info->next, SPIRV_TARGET_INFO);
+ environment = spirv_info ? spirv_info->environment : VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0;
+
+ switch (environment)
+ {
+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_OPENGL_4_5:
+ return true;
+
+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_0:
+ case VKD3D_SHADER_SPIRV_ENVIRONMENT_VULKAN_1_1:
+ /* FIXME: Allow KHR_maintenance4. */
+ return false;
+
+ default:
+ FIXME("Unrecognized environment %#x.\n", environment);
+ return false;
+ }
+
+ default:
+ return true;
+ }
+}
+
+static void remove_unread_output_components(const struct shader_signature *signature,
+ struct vkd3d_shader_instruction *ins, struct vkd3d_shader_dst_param *dst)
+{
+ const struct signature_element *e;
+
+ switch (dst->reg.type)
+ {
+ case VKD3DSPR_OUTPUT:
+ e = vsir_signature_find_element_for_reg(signature, dst->reg.idx[0].offset, 0);
+ break;
+
+ case VKD3DSPR_ATTROUT:
+ e = vsir_signature_find_element_for_reg(signature,
+ SM1_COLOR_REGISTER_OFFSET + dst->reg.idx[0].offset, 0);
+ break;
+
+ case VKD3DSPR_RASTOUT:
+ e = vsir_signature_find_element_for_reg(signature,
+ SM1_RASTOUT_REGISTER_OFFSET + dst->reg.idx[0].offset, 0);
+ break;
+
+ default:
+ return;
+ }
+
+ /* We already changed the mask earlier. */
+ dst->write_mask &= e->mask;
+
+ if (!dst->write_mask)
+ {
+ if (ins->dst_count == 1)
+ vkd3d_shader_instruction_make_nop(ins);
+ else
+ vsir_dst_param_init(dst, VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0);
+ }
+}
+
static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program,
struct vsir_transformation_context *ctx)
{
const struct vkd3d_shader_location location = {.source_name = ctx->compile_info->source_name};
struct vkd3d_shader_message_context *message_context = ctx->message_context;
const struct vkd3d_shader_compile_info *compile_info = ctx->compile_info;
+ bool allows_subset_masks = target_allows_subset_masks(compile_info);
struct shader_signature *signature = &program->output_signature;
+ unsigned int orig_element_count = signature->element_count;
const struct vkd3d_shader_varying_map_info *varying_map;
+ struct signature_element *new_elements, *e;
+ unsigned int uninit_varying_count = 0;
+ unsigned int subset_varying_count = 0;
+ unsigned int new_register_count = 0;
unsigned int i;
if (!(varying_map = vkd3d_find_struct(compile_info->next, VARYING_MAP_INFO)))
@@ -819,22 +1026,29 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program
for (i = 0; i < signature->element_count; ++i)
{
const struct vkd3d_shader_varying_map *map = find_varying_map(varying_map, i);
- struct signature_element *e = &signature->elements[i];
+ e = &signature->elements[i];
if (map)
{
unsigned int input_mask = map->input_mask;
e->target_location = map->input_register_index;
- /* It is illegal in Vulkan if the next shader uses the same varying
- * location with a different mask. */
- if (input_mask && input_mask != e->mask)
+ if ((input_mask & e->mask) == input_mask)
+ {
+ ++subset_varying_count;
+ if (!allows_subset_masks)
+ {
+ e->mask = input_mask;
+ e->used_mask &= input_mask;
+ }
+ }
+ else if (input_mask && input_mask != e->mask)
{
vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
"Aborting due to not yet implemented feature: "
- "Output mask %#x does not match input mask %#x.",
- e->mask, input_mask);
+ "Input mask %#x reads components not written in output mask %#x.",
+ input_mask, e->mask);
return VKD3D_ERROR_NOT_IMPLEMENTED;
}
}
@@ -842,17 +1056,103 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program
{
e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED;
}
+
+ new_register_count = max(new_register_count, e->register_index + 1);
}
+ /* Handle uninitialized varyings by writing them before every ret.
+ *
+ * As far as sm1-sm3 is concerned, drivers disagree on what uninitialized
+ * varyings contain.
+ *
+ * - Diffuse (COLOR0) reliably contains (1, 1, 1, 1) in SM1/2.
+ * In SM3 it may contain (0, 0, 0, 0), (0, 0, 0, 1), or (1, 1, 1, 1).
+ *
+ * - Specular (COLOR1) contains (0, 0, 0, 0) or (0, 0, 0, 1).
+ * WARP writes (1, 1, 1, 1).
+ *
+ * - Anything else contains (0, 0, 0, 0) or (0, 0, 0, 1).
+ *
+ * We don't have enough knowledge to identify diffuse here. Instead we deal
+ * with that in vsir_program_ensure_diffuse(), by always writing diffuse if
+ * the shader doesn't.
+ */
+
for (i = 0; i < varying_map->varying_count; ++i)
{
if (varying_map->varying_map[i].output_signature_index >= signature->element_count)
+ ++uninit_varying_count;
+ }
+
+ if (!(new_elements = vkd3d_realloc(signature->elements,
+ (signature->element_count + uninit_varying_count) * sizeof(*signature->elements))))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+ signature->elements = new_elements;
+
+ for (i = 0; i < varying_map->varying_count; ++i)
+ {
+ const struct vkd3d_shader_varying_map *map = &varying_map->varying_map[i];
+
+ if (map->output_signature_index < orig_element_count)
+ continue;
+
+ TRACE("Synthesizing zero value for uninitialized output %u (mask %u).\n",
+ map->input_register_index, map->input_mask);
+ e = &signature->elements[signature->element_count++];
+ memset(e, 0, sizeof(*e));
+ e->sysval_semantic = VKD3D_SHADER_SV_NONE;
+ e->component_type = VKD3D_SHADER_COMPONENT_FLOAT;
+ e->register_count = 1;
+ e->mask = map->input_mask;
+ e->used_mask = map->input_mask;
+ e->register_index = new_register_count++;
+ e->target_location = map->input_register_index;
+ e->interpolation_mode = VKD3DSIM_LINEAR;
+ }
+
+ /* Write each uninitialized varying before each ret. */
+ for (i = 0; i < program->instructions.count; ++i)
+ {
+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
+ struct vkd3d_shader_location loc;
+
+ if (ins->opcode != VKD3DSIH_RET)
+ continue;
+
+ loc = ins->location;
+ if (!shader_instruction_array_insert_at(&program->instructions, i, uninit_varying_count))
+ return VKD3D_ERROR_OUT_OF_MEMORY;
+
+ ins = &program->instructions.elements[i];
+
+ for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j)
{
- vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
- "Aborting due to not yet implemented feature: "
- "The next stage consumes varyings not written by this stage.");
- return VKD3D_ERROR_NOT_IMPLEMENTED;
+ e = &signature->elements[j];
+
+ vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1);
+ dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, e->register_index, e->mask);
+ vsir_src_param_init(&ins->src[0], VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0);
+ ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
+ ++ins;
}
+
+ i += uninit_varying_count;
+ }
+
+ /* Vulkan (without KHR_maintenance4) disallows any mismatching masks,
+ * including when the input mask is a proper subset of the output mask.
+ * Resolve this by rewriting the shader to remove unread components from
+ * any writes to the output variable. */
+
+ if (!subset_varying_count || allows_subset_masks)
+ return VKD3D_OK;
+
+ for (i = 0; i < program->instructions.count; ++i)
+ {
+ struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
+
+ for (unsigned int j = 0; j < ins->dst_count; ++j)
+ remove_unread_output_components(signature, ins, &ins->dst[j]);
}
return VKD3D_OK;
@@ -2490,15 +2790,14 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla
static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener,
struct vkd3d_shader_message_context *message_context)
{
- bool main_block_open, is_hull_shader, after_declarations_section;
struct vkd3d_shader_instruction_array *instructions;
struct vsir_program *program = flattener->program;
+ bool is_hull_shader, after_declarations_section;
struct vkd3d_shader_instruction *dst_ins;
size_t i;
instructions = &program->instructions;
is_hull_shader = program->shader_version.type == VKD3D_SHADER_TYPE_HULL;
- main_block_open = !is_hull_shader;
after_declarations_section = is_hull_shader;
if (!cf_flattener_require_space(flattener, instructions->count + 1))
@@ -2822,8 +3121,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte
if (cf_info)
cf_info->inside_block = false;
- else
- main_block_open = false;
break;
default:
@@ -2833,14 +3130,6 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte
}
}
- if (main_block_open)
- {
- if (!(dst_ins = cf_flattener_require_space(flattener, 1)))
- return VKD3D_ERROR_OUT_OF_MEMORY;
- vsir_instruction_init(dst_ins, &flattener->location, VKD3DSIH_RET);
- ++flattener->instruction_count;
- }
-
return flattener->status;
}
@@ -5564,21 +5853,6 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru
return VKD3D_OK;
}
-static bool find_colour_signature_idx(const struct shader_signature *signature, uint32_t *index)
-{
- for (unsigned int i = 0; i < signature->element_count; ++i)
- {
- if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_TARGET
- && !signature->elements[i].register_index)
- {
- *index = i;
- return true;
- }
- }
-
- return false;
-}
-
static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program,
const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func,
const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx,
@@ -5690,7 +5964,7 @@ static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *pro
if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
return VKD3D_OK;
- if (!find_colour_signature_idx(&program->output_signature, &colour_signature_idx)
+ if (!vsir_signature_find_sysval(&program->output_signature, VKD3D_SHADER_SV_TARGET, 0, &colour_signature_idx)
|| !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3))
return VKD3D_OK;
@@ -5808,21 +6082,6 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog
return VKD3D_OK;
}
-static bool find_sysval_signature_idx(const struct shader_signature *signature,
- enum vkd3d_shader_sysval_semantic sysval, uint32_t *idx)
-{
- for (unsigned int i = 0; i < signature->element_count; ++i)
- {
- if (signature->elements[i].sysval_semantic == sysval)
- {
- *idx = i;
- return true;
- }
- }
-
- return false;
-}
-
static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *program,
struct vsir_transformation_context *ctx)
{
@@ -5878,7 +6137,7 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr
}
}
- if (!find_sysval_signature_idx(signature, VKD3D_SHADER_SV_POSITION, &position_signature_idx))
+ if (!vsir_signature_find_sysval(signature, VKD3D_SHADER_SV_POSITION, 0, &position_signature_idx))
{
vkd3d_shader_error(ctx->message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_MISSING_SEMANTIC,
"Shader does not write position.");
@@ -7988,6 +8247,31 @@ static void vsir_transform_(
}
}
+/* Transformations which should happen at parse time, i.e. before scan
+ * information is returned to the user.
+ *
+ * In particular, some passes need to modify the signature, and
+ * vkd3d_shader_scan() should report the modified signature for the given
+ * target. */
+enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags,
+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context)
+{
+ struct vsir_transformation_context ctx =
+ {
+ .result = VKD3D_OK,
+ .program = program,
+ .config_flags = config_flags,
+ .compile_info = compile_info,
+ .message_context = message_context,
+ };
+
+ /* For vsir_program_ensure_diffuse(). */
+ if (program->shader_version.major <= 2)
+ vsir_transform(&ctx, vsir_program_add_diffuse_output);
+
+ return ctx.result;
+}
+
enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags,
const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context)
{
@@ -8012,6 +8296,11 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t
}
else
{
+ vsir_transform(&ctx, vsir_program_ensure_ret);
+
+ if (program->shader_version.major <= 2)
+ vsir_transform(&ctx, vsir_program_ensure_diffuse);
+
if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
vsir_transform(&ctx, vsir_program_remap_output_signature);
diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c
index befe5eacf9c..bbd2f761d29 100644
--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c
@@ -674,6 +674,7 @@ struct sm4_index_range_array
struct vkd3d_sm4_lookup_tables
{
const struct vkd3d_sm4_opcode_info *opcode_info_from_sm4[VKD3D_SM4_OP_COUNT];
+ const struct vkd3d_sm4_opcode_info *opcode_info_from_vsir[VKD3DSIH_COUNT];
const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT];
const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT];
const struct vkd3d_sm4_stat_field_info *stat_field_from_sm4[VKD3D_SM4_OP_COUNT];
@@ -1412,6 +1413,8 @@ struct tpf_compiler
struct vkd3d_sm4_lookup_tables lookup;
struct sm4_stat *stat;
+ int result;
+
struct vkd3d_bytecode_buffer *buffer;
struct dxbc_writer dxbc;
};
@@ -1903,6 +1906,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
const struct vkd3d_sm4_opcode_info *info = &opcode_table[i];
lookup->opcode_info_from_sm4[info->opcode] = info;
+ lookup->opcode_info_from_vsir[info->handler_idx] = info;
}
for (i = 0; i < ARRAY_SIZE(register_type_table); ++i)
@@ -1929,6 +1933,24 @@ static const struct vkd3d_sm4_opcode_info *get_info_from_sm4_opcode(
return lookup->opcode_info_from_sm4[sm4_opcode];
}
+static const struct vkd3d_sm4_opcode_info *get_info_from_vsir_opcode(
+ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_shader_opcode vsir_opcode)
+{
+ if (vsir_opcode >= VKD3DSIH_COUNT)
+ return NULL;
+ return lookup->opcode_info_from_vsir[vsir_opcode];
+}
+
+static unsigned int opcode_info_get_dst_count(const struct vkd3d_sm4_opcode_info *info)
+{
+ return strnlen(info->dst_info, SM4_MAX_DST_COUNT);
+}
+
+static unsigned int opcode_info_get_src_count(const struct vkd3d_sm4_opcode_info *info)
+{
+ return strnlen(info->src_info, SM4_MAX_SRC_COUNT);
+}
+
static const struct vkd3d_sm4_register_type_info *get_info_from_sm4_register_type(
const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_register_type sm4_type)
{
@@ -2651,8 +2673,8 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str
ins->raw = false;
ins->structured = false;
ins->predicate = NULL;
- ins->dst_count = strnlen(opcode_info->dst_info, SM4_MAX_DST_COUNT);
- ins->src_count = strnlen(opcode_info->src_info, SM4_MAX_SRC_COUNT);
+ ins->dst_count = opcode_info_get_dst_count(opcode_info);
+ ins->src_count = opcode_info_get_src_count(opcode_info);
ins->src = src_params = vsir_program_get_src_params(program, ins->src_count);
if (!src_params && ins->src_count)
{
@@ -2971,7 +2993,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con
return VKD3D_OK;
}
-static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block);
+static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block);
static bool type_is_integer(const struct hlsl_type *type)
{
@@ -6094,11 +6116,12 @@ static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_
hlsl_release_string_buffer(tpf->ctx, dst_type_string);
}
-static void write_sm4_if(const struct tpf_compiler *tpf, const struct hlsl_ir_if *iff)
+static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff)
{
struct sm4_instruction instr =
{
- .opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ,
+ .opcode = VKD3D_SM4_OP_IF,
+ .extra_bits = VKD3D_SM4_CONDITIONAL_NZ,
.src_count = 1,
};
@@ -6210,7 +6233,7 @@ static void write_sm4_load(const struct tpf_compiler *tpf, const struct hlsl_ir_
write_sm4_instruction(tpf, &instr);
}
-static void write_sm4_loop(const struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop)
+static void write_sm4_loop(struct tpf_compiler *tpf, const struct hlsl_ir_loop *loop)
{
struct sm4_instruction instr =
{
@@ -6394,7 +6417,7 @@ static void write_sm4_store(const struct tpf_compiler *tpf, const struct hlsl_ir
write_sm4_instruction(tpf, &instr);
}
-static void write_sm4_switch(const struct tpf_compiler *tpf, const struct hlsl_ir_switch *s)
+static void write_sm4_switch(struct tpf_compiler *tpf, const struct hlsl_ir_switch *s)
{
const struct hlsl_ir_node *selector = s->selector.node;
struct hlsl_ir_switch_case *c;
@@ -6455,7 +6478,46 @@ static void write_sm4_swizzle(const struct tpf_compiler *tpf, const struct hlsl_
write_sm4_instruction(tpf, &instr);
}
-static void tpf_handle_instruction(const struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins)
+static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins)
+{
+ const struct vkd3d_sm4_opcode_info *info;
+ struct sm4_instruction instr = {0};
+ unsigned int dst_count, src_count;
+
+ info = get_info_from_vsir_opcode(&tpf->lookup, ins->opcode);
+ VKD3D_ASSERT(info);
+
+ dst_count = opcode_info_get_dst_count(info);
+ src_count = opcode_info_get_src_count(info);
+
+ if (ins->dst_count != dst_count)
+ {
+ ERR("Invalid destination count %u for vsir instruction %#x (expected %u).\n",
+ ins->dst_count, ins->opcode, dst_count);
+ tpf->result = VKD3D_ERROR_INVALID_SHADER;
+ return;
+ }
+ if (ins->src_count != src_count)
+ {
+ ERR("Invalid source count %u for vsir instruction %#x (expected %u).\n",
+ ins->src_count, ins->opcode, src_count);
+ tpf->result = VKD3D_ERROR_INVALID_SHADER;
+ return;
+ }
+
+ instr.opcode = info->opcode;
+ instr.dst_count = ins->dst_count;
+ instr.src_count = ins->src_count;
+
+ for (unsigned int i = 0; i < ins->dst_count; ++i)
+ instr.dsts[i] = ins->dst[i];
+ for (unsigned int i = 0; i < ins->src_count; ++i)
+ instr.srcs[i] = ins->src[i];
+
+ write_sm4_instruction(tpf, &instr);
+}
+
+static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_shader_instruction *ins)
{
switch (ins->opcode)
{
@@ -6467,13 +6529,17 @@ static void tpf_handle_instruction(const struct tpf_compiler *tpf, const struct
tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp);
break;
+ case VKD3DSIH_MOV:
+ tpf_simple_instruction(tpf, ins);
+ break;
+
default:
vkd3d_unreachable();
break;
}
}
-static void write_sm4_block(const struct tpf_compiler *tpf, const struct hlsl_block *block)
+static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block)
{
const struct hlsl_ir_node *instr;
unsigned int vsir_instr_idx;
@@ -6765,7 +6831,13 @@ int tpf_compile(struct vsir_program *program, uint64_t config_flags,
tpf_write_sfi0(&tpf);
tpf_write_stat(&tpf);
- if (!(ret = ctx->result))
+ ret = VKD3D_OK;
+ if (ctx->result)
+ ret = ctx->result;
+ if (tpf.result)
+ ret = tpf.result;
+
+ if (!ret)
ret = dxbc_writer_write(&tpf.dxbc, out);
for (i = 0; i < tpf.dxbc.section_count; ++i)
vkd3d_shader_free_shader_code(&tpf.dxbc.sections[i].data);
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
index 3355e18b88e..3afac9a38a4 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
@@ -719,8 +719,11 @@ static enum vkd3d_result vsir_parse(const struct vkd3d_shader_compile_info *comp
vsir_program_trace(program);
vsir_program_cleanup(program);
+ return ret;
}
+ if (compile_info->target_type != VKD3D_SHADER_TARGET_NONE)
+ ret = vsir_program_transform_early(program, config_flags, compile_info, message_context);
return ret;
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
index 5ae938e0525..1b6c37343d1 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
@@ -585,6 +585,8 @@ enum vkd3d_shader_opcode
VKD3DSIH_XOR,
VKD3DSIH_INVALID,
+
+ VKD3DSIH_COUNT,
};
enum vkd3d_shader_register_type
@@ -1424,6 +1426,7 @@ struct vsir_program
bool use_vocp;
bool has_point_size;
bool has_point_coord;
+ uint8_t diffuse_written_mask;
enum vsir_control_flow_type cf_type;
enum vsir_normalisation_level normalisation_level;
@@ -1442,6 +1445,8 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c
enum vsir_normalisation_level normalisation_level);
enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags,
const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context);
+enum vkd3d_result vsir_program_transform_early(struct vsir_program *program, uint64_t config_flags,
+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context);
enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags,
const char *source_name, struct vkd3d_shader_message_context *message_context);
struct vkd3d_shader_src_param *vsir_program_create_outpointid_param(
--
2.45.2