wine-staging/patches/vkd3d-latest/0006-Updated-vkd3d-to-4c03cda3c77123a71590b872acdc216e362.patch

1824 lines
74 KiB
Diff
Raw Normal View History

2024-09-20 16:44:13 -07:00
From c55e0d0e093a7c99c0a45f7084d81b7477a5e025 Mon Sep 17 00:00:00 2001
2024-09-17 18:58:56 -07:00
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Sat, 14 Sep 2024 10:18:09 +1000
Subject: [PATCH] Updated vkd3d to 4c03cda3c77123a71590b872acdc216e3625c109.
---
libs/vkd3d/Makefile.in | 1 +
libs/vkd3d/include/vkd3d_shader.h | 4 +
libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 258 +---------------
libs/vkd3d/libs/vkd3d-shader/dxil.c | 2 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.h | 3 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.y | 63 ++--
libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 275 ++++++++++++++++-
libs/vkd3d/libs/vkd3d-shader/ir.c | 282 ++++++++++--------
libs/vkd3d/libs/vkd3d-shader/tpf.c | 251 +++++++++++++++-
.../libs/vkd3d-shader/vkd3d_shader_main.c | 9 +
.../libs/vkd3d-shader/vkd3d_shader_private.h | 16 +-
11 files changed, 738 insertions(+), 426 deletions(-)
diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in
index 94e4833dc9a..b073790d986 100644
--- a/libs/vkd3d/Makefile.in
+++ b/libs/vkd3d/Makefile.in
@@ -25,6 +25,7 @@ SOURCES = \
libs/vkd3d-shader/hlsl_codegen.c \
libs/vkd3d-shader/hlsl_constant_ops.c \
libs/vkd3d-shader/ir.c \
+ libs/vkd3d-shader/msl.c \
libs/vkd3d-shader/preproc.l \
libs/vkd3d-shader/preproc.y \
libs/vkd3d-shader/spirv.c \
diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h
index 5737d27c0e9..46feff35138 100644
--- a/libs/vkd3d/include/vkd3d_shader.h
+++ b/libs/vkd3d/include/vkd3d_shader.h
@@ -1087,6 +1087,10 @@ enum vkd3d_shader_target_type
* Output is a raw FX section without container. \since 1.11
*/
VKD3D_SHADER_TARGET_FX,
+ /**
+ * A 'Metal Shading Language' shader. \since 1.14
+ */
+ VKD3D_SHADER_TARGET_MSL,
VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_TARGET_TYPE),
};
diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
index e7d1d2420c6..b69b70c6304 100644
--- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
+++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c
@@ -1272,7 +1272,8 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st
sm1->end = &code[token_count];
/* Estimate instruction count to avoid reallocation in most shaders. */
- if (!vsir_program_init(program, compile_info, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16))
+ if (!vsir_program_init(program, compile_info, &version,
+ code_size != ~(size_t)0 ? token_count / 4u + 4 : 16, VSIR_CF_STRUCTURED))
return VKD3D_ERROR_OUT_OF_MEMORY;
vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name);
@@ -1961,112 +1962,6 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct s
write_sm1_src_register(buffer, &instr->srcs[i]);
};
-static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask)
-{
- src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask);
-}
-
-static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, enum vkd3d_sm1_opcode opcode,
- const struct hlsl_reg *dst, const struct hlsl_reg *src,
- enum vkd3d_shader_src_modifier src_mod, enum vkd3d_shader_dst_modifier dst_mod)
-{
- struct sm1_instruction instr =
- {
- .opcode = opcode,
-
- .dst.type = VKD3DSPR_TEMP,
- .dst.mod = dst_mod,
- .dst.writemask = dst->writemask,
- .dst.reg = dst->id,
- .has_dst = 1,
-
- .srcs[0].type = VKD3DSPR_TEMP,
- .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask),
- .srcs[0].reg = src->id,
- .srcs[0].mod = src_mod,
- .src_count = 1,
- };
-
- sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask);
- d3dbc_write_instruction(d3dbc, &instr);
-}
-
-static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
-{
- struct hlsl_ir_expr *expr = hlsl_ir_expr(instr);
- const struct hlsl_ir_node *arg1 = expr->operands[0].node;
- const struct hlsl_type *dst_type = expr->node.data_type;
- const struct hlsl_type *src_type = arg1->data_type;
- struct hlsl_ctx *ctx = d3dbc->ctx;
-
- /* Narrowing casts were already lowered. */
- VKD3D_ASSERT(src_type->dimx == dst_type->dimx);
-
- switch (dst_type->e.numeric.type)
- {
- case HLSL_TYPE_HALF:
- case HLSL_TYPE_FLOAT:
- switch (src_type->e.numeric.type)
- {
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- case HLSL_TYPE_BOOL:
- /* Integrals are internally represented as floats, so no change is necessary.*/
- case HLSL_TYPE_HALF:
- case HLSL_TYPE_FLOAT:
- d3dbc_write_unary_op(d3dbc, VKD3D_SM1_OP_MOV, &instr->reg, &arg1->reg, 0, 0);
- break;
-
- case HLSL_TYPE_DOUBLE:
- hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float.");
- break;
-
- default:
- vkd3d_unreachable();
- }
- break;
-
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- switch(src_type->e.numeric.type)
- {
- case HLSL_TYPE_HALF:
- case HLSL_TYPE_FLOAT:
- /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not
- * reach this case unless we are missing something. */
- hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer.");
- break;
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- d3dbc_write_unary_op(d3dbc, VKD3D_SM1_OP_MOV, &instr->reg, &arg1->reg, 0, 0);
- break;
-
- case HLSL_TYPE_BOOL:
- hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer.");
- break;
-
- case HLSL_TYPE_DOUBLE:
- hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer.");
- break;
-
- default:
- vkd3d_unreachable();
- }
- break;
-
- case HLSL_TYPE_DOUBLE:
- hlsl_fixme(ctx, &instr->loc, "SM1 cast to double.");
- break;
-
- case HLSL_TYPE_BOOL:
- /* Casts to bool should have already been lowered. */
- default:
- hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.",
- debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type));
- break;
- }
-}
-
static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info_from_vsir(
struct d3dbc_compiler *d3dbc, enum vkd3d_shader_opcode vkd3d_opcode)
{
@@ -2308,6 +2203,9 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str
case VKD3DSIH_MUL:
case VKD3DSIH_SINCOS:
case VKD3DSIH_SLT:
+ case VKD3DSIH_TEX:
+ case VKD3DSIH_TEXKILL:
+ case VKD3DSIH_TEXLDD:
d3dbc_write_vsir_simple_instruction(d3dbc, ins);
break;
@@ -2366,8 +2264,8 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc,
put_u32(buffer, token);
token = (1u << 31);
- token |= usage << D3DSP_DCL_USAGE_SHIFT;
- token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT;
+ token |= usage << VKD3D_SM1_DCL_USAGE_SHIFT;
+ token |= usage_idx << VKD3D_SM1_DCL_USAGE_INDEX_SHIFT;
put_u32(buffer, token);
reg.writemask = element->mask;
@@ -2401,36 +2299,6 @@ static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc)
}
}
-static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
-{
- struct hlsl_ir_expr *expr = hlsl_ir_expr(instr);
- struct hlsl_ir_node *arg1 = expr->operands[0].node;
- struct hlsl_ctx *ctx = d3dbc->ctx;
-
- VKD3D_ASSERT(instr->reg.allocated);
-
- if (expr->op == HLSL_OP1_REINTERPRET)
- {
- d3dbc_write_unary_op(d3dbc, VKD3D_SM1_OP_MOV, &instr->reg, &arg1->reg, 0, 0);
- return;
- }
-
- if (expr->op == HLSL_OP1_CAST)
- {
- d3dbc_write_cast(d3dbc, instr);
- return;
- }
-
- if (instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT)
- {
- /* These need to be lowered. */
- hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression.");
- return;
- }
-
- hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op));
-}
-
static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block);
static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
@@ -2473,106 +2341,6 @@ static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_no
d3dbc_write_instruction(d3dbc, &sm1_endif);
}
-static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
-{
- const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr);
-
- switch (jump->type)
- {
- case HLSL_IR_JUMP_DISCARD_NEG:
- {
- struct hlsl_reg *reg = &jump->condition.node->reg;
-
- struct sm1_instruction sm1_instr =
- {
- .opcode = VKD3D_SM1_OP_TEXKILL,
-
- .dst.type = VKD3DSPR_TEMP,
- .dst.reg = reg->id,
- .dst.writemask = reg->writemask,
- .has_dst = 1,
- };
-
- d3dbc_write_instruction(d3dbc, &sm1_instr);
- break;
- }
-
- default:
- hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type));
- }
-}
-
-static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr)
-{
- const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
- struct hlsl_ir_node *coords = load->coords.node;
- struct hlsl_ir_node *ddx = load->ddx.node;
- struct hlsl_ir_node *ddy = load->ddy.node;
- unsigned int sampler_offset, reg_id;
- struct hlsl_ctx *ctx = d3dbc->ctx;
- struct sm1_instruction sm1_instr;
-
- sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource);
- reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset;
-
- sm1_instr = (struct sm1_instruction)
- {
- .dst.type = VKD3DSPR_TEMP,
- .dst.reg = instr->reg.id,
- .dst.writemask = instr->reg.writemask,
- .has_dst = 1,
-
- .srcs[0].type = VKD3DSPR_TEMP,
- .srcs[0].reg = coords->reg.id,
- .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask),
-
- .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER,
- .srcs[1].reg = reg_id,
- .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL),
-
- .src_count = 2,
- };
-
- switch (load->load_type)
- {
- case HLSL_RESOURCE_SAMPLE:
- sm1_instr.opcode = VKD3D_SM1_OP_TEX;
- break;
-
- case HLSL_RESOURCE_SAMPLE_PROJ:
- sm1_instr.opcode = VKD3D_SM1_OP_TEX;
- sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT;
- break;
-
- case HLSL_RESOURCE_SAMPLE_LOD_BIAS:
- sm1_instr.opcode = VKD3D_SM1_OP_TEX;
- sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT;
- break;
-
- case HLSL_RESOURCE_SAMPLE_GRAD:
- sm1_instr.opcode = VKD3D_SM1_OP_TEXLDD;
-
- sm1_instr.srcs[2].type = VKD3DSPR_TEMP;
- sm1_instr.srcs[2].reg = ddx->reg.id;
- sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask);
-
- sm1_instr.srcs[3].type = VKD3DSPR_TEMP;
- sm1_instr.srcs[3].reg = ddy->reg.id;
- sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask);
-
- sm1_instr.src_count += 2;
- break;
-
- default:
- hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type);
- return;
- }
-
- VKD3D_ASSERT(instr->reg.allocated);
-
- d3dbc_write_instruction(d3dbc, &sm1_instr);
-}
-
static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block)
{
struct vkd3d_shader_instruction *vsir_instr;
@@ -2596,10 +2364,6 @@ static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_bl
case HLSL_IR_CALL:
vkd3d_unreachable();
- case HLSL_IR_EXPR:
- d3dbc_write_expr(d3dbc, instr);
- break;
-
case HLSL_IR_IF:
if (hlsl_version_ge(ctx, 2, 1))
d3dbc_write_if(d3dbc, instr);
@@ -2607,14 +2371,6 @@ static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_bl
hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches.");
break;
- case HLSL_IR_JUMP:
- d3dbc_write_jump(d3dbc, instr);
- break;
-
- case HLSL_IR_RESOURCE_LOAD:
- d3dbc_write_resource_load(d3dbc, instr);
- break;
-
case HLSL_IR_VSIR_INSTRUCTION_REF:
vsir_instr_idx = hlsl_ir_vsir_instruction_ref(instr)->vsir_instr_idx;
vsir_instr = &d3dbc->program->instructions.elements[vsir_instr_idx];
diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c
index 1c62a305d30..ee78b6251f9 100644
--- a/libs/vkd3d/libs/vkd3d-shader/dxil.c
+++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c
@@ -10303,7 +10303,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro
/* Estimate instruction count to avoid reallocation in most shaders. */
count = max(token_count, 400) - 400;
- if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10))
+ if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10, VSIR_CF_BLOCKS))
return VKD3D_ERROR_OUT_OF_MEMORY;
vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name);
sm6->ptr = &sm6->start[1];
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
index bdd0e401770..eece693b48c 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h
@@ -22,7 +22,6 @@
#include "vkd3d_shader_private.h"
#include "wine/rbtree.h"
-#include "d3dcommon.h"
#include "d3dx9shader.h"
/* The general IR structure is inspired by Mesa GLSL hir, even though the code
@@ -603,6 +602,8 @@ struct hlsl_ir_function_decl
unsigned int attr_count;
const struct hlsl_attribute *const *attrs;
+ bool early_depth_test;
+
/* Synthetic boolean variable marking whether a return statement has been
* executed. Needed to deal with return statements in non-uniform control
* flow, since some backends can't handle them. */
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
index eabf072befb..60e196c63cc 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
@@ -1673,25 +1673,36 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct hlsl_block *bl
return expr;
}
-static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr)
+static bool type_is_integer(enum hlsl_base_type type)
{
- const struct hlsl_type *type = instr->data_type;
- struct vkd3d_string_buffer *string;
-
- switch (type->e.numeric.type)
+ switch (type)
{
case HLSL_TYPE_BOOL:
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
- break;
+ return true;
- default:
- if ((string = hlsl_type_to_string(ctx, type)))
- hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
- "Expression type '%s' is not integer.", string->buffer);
- hlsl_release_string_buffer(ctx, string);
- break;
+ case HLSL_TYPE_DOUBLE:
+ case HLSL_TYPE_FLOAT:
+ case HLSL_TYPE_HALF:
+ return false;
}
+
+ vkd3d_unreachable();
+}
+
+static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr)
+{
+ const struct hlsl_type *type = instr->data_type;
+ struct vkd3d_string_buffer *string;
+
+ if (type_is_integer(type->e.numeric.type))
+ return;
+
+ if ((string = hlsl_type_to_string(ctx, type)))
+ hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
+ "Expression type '%s' is not integer.", string->buffer);
+ hlsl_release_string_buffer(ctx, string);
}
static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block,
@@ -3033,7 +3044,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx,
{
struct hlsl_type *type = arg->data_type;
- if (type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF)
+ if (!type_is_integer(type->e.numeric.type))
return arg;
type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
@@ -3121,14 +3132,12 @@ static bool elementwise_intrinsic_convert_args(struct hlsl_ctx *ctx,
static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
- enum hlsl_base_type base_type;
struct hlsl_type *type;
if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc)))
return false;
-
- base_type = type->e.numeric.type == HLSL_TYPE_HALF ? HLSL_TYPE_HALF : HLSL_TYPE_FLOAT;
- type = hlsl_get_numeric_type(ctx, type->class, base_type, type->dimx, type->dimy);
+ if (type_is_integer(type->e.numeric.type))
+ type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
return convert_args(ctx, params, type, loc);
}
@@ -3156,6 +3165,7 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc, bool asin_mode)
{
struct hlsl_ir_function_decl *func;
+ struct hlsl_ir_node *arg;
struct hlsl_type *type;
char *body;
@@ -3179,8 +3189,9 @@ static bool write_acos_or_asin(struct hlsl_ctx *ctx,
const char *fn_name = asin_mode ? fn_name_asin : fn_name_acos;
- type = params->args[0]->data_type;
- type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy);
+ if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc)))
+ return false;
+ type = arg->data_type;
if (!(body = hlsl_sprintf_alloc(ctx, template,
type->name, fn_name, type->name,
@@ -3552,9 +3563,8 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx,
struct hlsl_type *cast_type;
enum hlsl_base_type base;
- if (arg1->data_type->e.numeric.type == HLSL_TYPE_HALF && arg2->data_type->e.numeric.type == HLSL_TYPE_HALF)
- base = HLSL_TYPE_HALF;
- else
+ base = expr_common_base_type(arg1->data_type->e.numeric.type, arg2->data_type->e.numeric.type);
+ if (type_is_integer(base))
base = HLSL_TYPE_FLOAT;
cast_type = hlsl_get_vector_type(ctx, base, 3);
@@ -3725,15 +3735,14 @@ static bool intrinsic_determinant(struct hlsl_ctx *ctx,
return false;
}
+ if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc)))
+ return false;
+
dim = min(type->dimx, type->dimy);
if (dim == 1)
- {
- if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc)))
- return false;
return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc);
- }
- typename = type->e.numeric.type == HLSL_TYPE_HALF ? "half" : "float";
+ typename = hlsl_get_scalar_type(ctx, arg->data_type->e.numeric.type)->name;
template = templates[dim];
switch (dim)
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index 2d80b524913..93f19360953 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -6027,7 +6027,7 @@ static void parse_patchconstantfunc_attribute(struct hlsl_ctx *ctx, const struct
"Patch constant function \"%s\" is not defined.", name);
}
-static void parse_entry_function_attributes(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func)
+static void parse_entry_function_attributes(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
{
const struct hlsl_profile_info *profile = ctx->profile;
unsigned int i;
@@ -6049,6 +6049,8 @@ static void parse_entry_function_attributes(struct hlsl_ctx *ctx, const struct h
parse_partitioning_attribute(ctx, attr);
else if (!strcmp(attr->name, "patchconstantfunc") && profile->type == VKD3D_SHADER_TYPE_HULL)
parse_patchconstantfunc_attribute(ctx, attr);
+ else if (!strcmp(attr->name, "earlydepthstencil") && profile->type == VKD3D_SHADER_TYPE_PIXEL)
+ entry_func->early_depth_test = true;
else
hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE,
"Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name);
@@ -6684,15 +6686,110 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi
hlsl_replace_node(instr, vsir_instr);
}
+static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_expr *expr)
+{
+ const struct hlsl_type *src_type, *dst_type;
+ const struct hlsl_ir_node *arg1, *instr;
+
+ arg1 = expr->operands[0].node;
+ src_type = arg1->data_type;
+ instr = &expr->node;
+ dst_type = instr->data_type;
+
+ /* Narrowing casts were already lowered. */
+ VKD3D_ASSERT(src_type->dimx == dst_type->dimx);
+
+ switch (dst_type->e.numeric.type)
+ {
+ case HLSL_TYPE_HALF:
+ case HLSL_TYPE_FLOAT:
+ switch (src_type->e.numeric.type)
+ {
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ case HLSL_TYPE_BOOL:
+ /* Integrals are internally represented as floats, so no change is necessary.*/
+ case HLSL_TYPE_HALF:
+ case HLSL_TYPE_FLOAT:
+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_DOUBLE:
+ hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float.");
+ break;
+
+ default:
+ vkd3d_unreachable();
+ }
+ break;
+
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ switch(src_type->e.numeric.type)
+ {
+ case HLSL_TYPE_HALF:
+ case HLSL_TYPE_FLOAT:
+ /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not
+ * reach this case unless we are missing something. */
+ hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer.");
+ break;
+
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_BOOL:
+ hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to integer.");
+ break;
+
+ case HLSL_TYPE_DOUBLE:
+ hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer.");
+ break;
+
+ default:
+ vkd3d_unreachable();
+ }
+ break;
+
+ case HLSL_TYPE_DOUBLE:
+ hlsl_fixme(ctx, &instr->loc, "SM1 cast to double.");
+ break;
+
+ case HLSL_TYPE_BOOL:
+ /* Casts to bool should have already been lowered. */
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.",
+ debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type));
+ break;
+ }
+
+ return false;
+}
+
static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_program *program,
struct hlsl_ir_expr *expr)
{
+ struct hlsl_ir_node *instr = &expr->node;
+
+ if (expr->op != HLSL_OP1_REINTERPRET && expr->op != HLSL_OP1_CAST
+ && instr->data_type->e.numeric.type != HLSL_TYPE_FLOAT)
+ {
+ /* These need to be lowered. */
+ hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression.");
+ return false;
+ }
+
switch (expr->op)
{
case HLSL_OP1_ABS:
sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true);
break;
+ case HLSL_OP1_CAST:
+ return sm1_generate_vsir_instr_expr_cast(ctx, program, expr);
+
case HLSL_OP1_COS_REDUCED:
VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_0);
sm1_generate_vsir_instr_expr_sincos(ctx, program, expr);
@@ -6722,6 +6819,10 @@ static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_progr
sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RCP);
break;
+ case HLSL_OP1_REINTERPRET:
+ sm1_generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
+ break;
+
case HLSL_OP1_RSQ:
sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RSQ);
break;
@@ -6858,29 +6959,52 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx,
unsigned int writemask;
struct hlsl_reg reg;
- reg = hlsl_reg_from_deref(ctx, deref);
- register_index = reg.id;
- writemask = reg.writemask;
+ if (hlsl_type_is_resource(deref->var->data_type))
+ {
+ unsigned int sampler_offset;
+
+ type = VKD3DSPR_COMBINED_SAMPLER;
- if (deref->var->is_uniform)
+ sampler_offset = hlsl_offset_from_deref_safe(ctx, deref);
+ register_index = deref->var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset;
+ writemask = VKD3DSP_WRITEMASK_ALL;
+ }
+ else if (deref->var->is_uniform)
{
- VKD3D_ASSERT(reg.allocated);
type = VKD3DSPR_CONST;
+
+ reg = hlsl_reg_from_deref(ctx, deref);
+ register_index = reg.id;
+ writemask = reg.writemask;
+ VKD3D_ASSERT(reg.allocated);
}
else if (deref->var->is_input_semantic)
{
version.major = ctx->profile->major_version;
version.minor = ctx->profile->minor_version;
version.type = ctx->profile->type;
- if (!hlsl_sm1_register_from_semantic(&version, deref->var->semantic.name,
+ if (hlsl_sm1_register_from_semantic(&version, deref->var->semantic.name,
deref->var->semantic.index, false, &type, &register_index))
{
- VKD3D_ASSERT(reg.allocated);
+ writemask = (1 << deref->var->data_type->dimx) - 1;
+ }
+ else
+ {
type = VKD3DSPR_INPUT;
+
+ reg = hlsl_reg_from_deref(ctx, deref);
register_index = reg.id;
+ writemask = reg.writemask;
+ VKD3D_ASSERT(reg.allocated);
}
- else
- writemask = (1 << deref->var->data_type->dimx) - 1;
+ }
+ else
+ {
+ type = VKD3DSPR_TEMP;
+
+ reg = hlsl_reg_from_deref(ctx, deref);
+ register_index = reg.id;
+ writemask = reg.writemask;
}
vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1);
@@ -6924,6 +7048,91 @@ static void sm1_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_progr
hlsl_replace_node(instr, vsir_instr);
}
+static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_resource_load *load)
+{
+ struct vkd3d_shader_instruction_array *instructions = &program->instructions;
+ struct hlsl_ir_node *coords = load->coords.node;
+ struct hlsl_ir_node *ddx = load->ddx.node;
+ struct hlsl_ir_node *ddy = load->ddy.node;
+ struct hlsl_ir_node *instr = &load->node;
+ struct vkd3d_shader_dst_param *dst_param;
+ struct vkd3d_shader_src_param *src_param;
+ struct vkd3d_shader_instruction *ins;
+ struct hlsl_ir_node *vsir_instr;
+ enum vkd3d_shader_opcode opcode;
+ unsigned int src_count = 2;
+ uint32_t flags = 0;
+
+ VKD3D_ASSERT(instr->reg.allocated);
+
+ switch (load->load_type)
+ {
+ case HLSL_RESOURCE_SAMPLE:
+ opcode = VKD3DSIH_TEX;
+ break;
+
+ case HLSL_RESOURCE_SAMPLE_PROJ:
+ opcode = VKD3DSIH_TEX;
+ flags |= VKD3DSI_TEXLD_PROJECT;
+ break;
+
+ case HLSL_RESOURCE_SAMPLE_LOD_BIAS:
+ opcode = VKD3DSIH_TEX;
+ flags |= VKD3DSI_TEXLD_BIAS;
+ break;
+
+ case HLSL_RESOURCE_SAMPLE_GRAD:
+ opcode = VKD3DSIH_TEXLDD;
+ src_count += 2;
+ break;
+
+ default:
+ hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type);
+ return;
+ }
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count)))
+ return;
+ ins->flags = flags;
+
+ dst_param = &ins->dst[0];
+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+ dst_param->reg.idx[0].offset = instr->reg.id;
+ dst_param->write_mask = instr->reg.writemask;
+
+ src_param = &ins->src[0];
+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+ src_param->reg.idx[0].offset = coords->reg.id;
+ src_param->swizzle = sm1_generate_vsir_get_src_swizzle(coords->reg.writemask, VKD3DSP_WRITEMASK_ALL);
+
+ sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource,
+ VKD3DSP_WRITEMASK_ALL, &ins->location);
+
+ if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD)
+ {
+ src_param = &ins->src[2];
+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+ src_param->reg.idx[0].offset = ddx->reg.id;
+ src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddx->reg.writemask, VKD3DSP_WRITEMASK_ALL);
+
+ src_param = &ins->src[3];
+ vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+ src_param->reg.idx[0].offset = ddy->reg.id;
+ src_param->swizzle = sm1_generate_vsir_get_src_swizzle(ddy->reg.writemask, VKD3DSP_WRITEMASK_ALL);
+ }
+
+ if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx, instructions->count - 1, instr->data_type,
+ &instr->reg, &instr->loc)))
+ {
+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
+ return;
+ }
+
+ list_add_before(&instr->entry, &vsir_instr->entry);
+ hlsl_replace_node(instr, vsir_instr);
+}
+
static void sm1_generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_program *program,
struct hlsl_ir_swizzle *swizzle_instr)
{
@@ -6996,6 +7205,42 @@ static void sm1_generate_vsir_instr_store(struct hlsl_ctx *ctx, struct vsir_prog
hlsl_replace_node(instr, vsir_instr);
}
+static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_jump *jump)
+{
+ struct vkd3d_shader_instruction_array *instructions = &program->instructions;
+ struct hlsl_ir_node *condition = jump->condition.node;
+ struct hlsl_ir_node *instr = &jump->node;
+ struct vkd3d_shader_dst_param *dst_param;
+ struct vkd3d_shader_instruction *ins;
+ struct hlsl_ir_node *vsir_instr;
+
+ if (jump->type == HLSL_IR_JUMP_DISCARD_NEG)
+ {
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 1, 0)))
+ return;
+
+ dst_param = &ins->dst[0];
+ vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
+ dst_param->reg.idx[0].offset = condition->reg.id;
+ dst_param->write_mask = condition->reg.writemask;
+
+ if (!(vsir_instr = hlsl_new_vsir_instruction_ref(ctx,
+ instructions->count - 1, instr->data_type, NULL, &instr->loc)))
+ {
+ ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
+ return;
+ }
+
+ list_add_before(&instr->entry, &vsir_instr->entry);
+ hlsl_replace_node(instr, vsir_instr);
+ }
+ else
+ {
+ hlsl_fixme(ctx, &instr->loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type));
+ }
+}
+
static bool sm1_generate_vsir_instr(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
struct vsir_program *program = context;
@@ -7009,10 +7254,18 @@ static bool sm1_generate_vsir_instr(struct hlsl_ctx *ctx, struct hlsl_ir_node *i
case HLSL_IR_EXPR:
return sm1_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr));
+ case HLSL_IR_JUMP:
+ sm1_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr));
+ return true;
+
case HLSL_IR_LOAD:
sm1_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr));
return true;
+ case HLSL_IR_RESOURCE_LOAD:
+ sm1_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr));
+ return true;
+
case HLSL_IR_STORE:
sm1_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr));
return true;
@@ -7041,7 +7294,7 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
version.major = ctx->profile->major_version;
version.minor = ctx->profile->minor_version;
version.type = ctx->profile->type;
- if (!vsir_program_init(program, NULL, &version, 0))
+ if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED))
{
ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
return;
diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c
index 6cef85fdc84..4b79a058b6f 100644
--- a/libs/vkd3d/libs/vkd3d-shader/ir.c
+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c
@@ -74,7 +74,7 @@ static int convert_parameter_info(const struct vkd3d_shader_compile_info *compil
}
bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info,
- const struct vkd3d_shader_version *version, unsigned int reserve)
+ const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type)
{
memset(program, 0, sizeof(*program));
@@ -96,6 +96,7 @@ bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_c
}
program->shader_version = *version;
+ program->cf_type = cf_type;
return shader_instruction_array_init(&program->instructions, reserve);
}
@@ -2803,6 +2804,8 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi
struct cf_flattener flattener = {.program = program};
enum vkd3d_result result;
+ VKD3D_ASSERT(program->cf_type == VSIR_CF_STRUCTURED);
+
if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0)
{
vkd3d_free(program->instructions.elements);
@@ -2810,6 +2813,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi
program->instructions.capacity = flattener.instruction_capacity;
program->instructions.count = flattener.instruction_count;
program->block_count = flattener.block_id;
+ program->cf_type = VSIR_CF_BLOCKS;
}
else
{
@@ -2877,6 +2881,8 @@ static enum vkd3d_result vsir_program_lower_switch_to_selection_ladder(struct vs
struct vkd3d_shader_instruction *instructions = NULL;
struct lower_switch_to_if_ladder_block_mapping *block_map = NULL;
+ VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS);
+
if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count))
goto fail;
@@ -3069,6 +3075,8 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_
struct ssas_to_temps_alloc alloc = {0};
unsigned int current_label = 0;
+ VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS);
+
if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info))))
{
ERR("Failed to allocate block info array.\n");
@@ -5289,6 +5297,8 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program,
enum vkd3d_result ret;
size_t i;
+ VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS);
+
target.jump_target_temp_idx = program->temp_count;
target.temp_count = program->temp_count + 1;
@@ -5336,6 +5346,7 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program,
program->instructions.capacity = target.ins_capacity;
program->instructions.count = target.ins_count;
program->temp_count = target.temp_count;
+ program->cf_type = VSIR_CF_STRUCTURED;
return VKD3D_OK;
@@ -5469,6 +5480,8 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru
enum vkd3d_result ret;
size_t i;
+ VKD3D_ASSERT(program->cf_type == VSIR_CF_BLOCKS);
+
for (i = 0; i < program->instructions.count;)
{
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
@@ -5701,12 +5714,6 @@ struct validation_context
enum vkd3d_result status;
bool dcl_temps_found;
enum vkd3d_shader_opcode phase;
- enum cf_type
- {
- CF_TYPE_UNKNOWN = 0,
- CF_TYPE_STRUCTURED,
- CF_TYPE_BLOCKS,
- } cf_type;
bool inside_block;
struct validation_context_temp_data
@@ -6119,13 +6126,13 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx,
return true;
}
-static const char *name_from_cf_type(enum cf_type type)
+static const char *name_from_cf_type(enum vsir_control_flow_type type)
{
switch (type)
{
- case CF_TYPE_STRUCTURED:
+ case VSIR_CF_STRUCTURED:
return "structured";
- case CF_TYPE_BLOCKS:
+ case VSIR_CF_BLOCKS:
return "block-based";
default:
vkd3d_unreachable();
@@ -6133,15 +6140,122 @@ static const char *name_from_cf_type(enum cf_type type)
}
static void vsir_validate_cf_type(struct validation_context *ctx,
- const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type)
+ const struct vkd3d_shader_instruction *instruction, enum vsir_control_flow_type expected_type)
{
- VKD3D_ASSERT(ctx->cf_type != CF_TYPE_UNKNOWN);
- VKD3D_ASSERT(expected_type != CF_TYPE_UNKNOWN);
- if (ctx->cf_type != expected_type)
+ if (ctx->program->cf_type != expected_type)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.",
- instruction->opcode, name_from_cf_type(ctx->cf_type));
+ instruction->opcode, name_from_cf_type(ctx->program->cf_type));
+}
+
+static void vsir_validator_push_block(struct validation_context *ctx, enum vkd3d_shader_opcode opcode)
+{
+ if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
+ {
+ ctx->status = VKD3D_ERROR_OUT_OF_MEMORY;
+ return;
+ }
+ ctx->blocks[ctx->depth++] = opcode;
+}
+
+static void vsir_validate_dcl_temps(struct validation_context *ctx,
+ const struct vkd3d_shader_instruction *instruction)
+{
+ if (ctx->dcl_temps_found)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS,
+ "Duplicate DCL_TEMPS instruction.");
+ if (instruction->declaration.count > ctx->program->temp_count)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS,
+ "Invalid DCL_TEMPS count %u, expected at most %u.",
+ instruction->declaration.count, ctx->program->temp_count);
+ ctx->dcl_temps_found = true;
+}
+
+static void vsir_validate_else(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
+{
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
+ if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
+ "ELSE instruction doesn't terminate IF block.");
+ else
+ ctx->blocks[ctx->depth - 1] = VKD3DSIH_ELSE;
+}
+
+static void vsir_validate_endif(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
+{
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
+ if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF
+ && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE))
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
+ "ENDIF instruction doesn't terminate IF/ELSE block.");
+ else
+ --ctx->depth;
+}
+
+static void vsir_validate_endloop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
+{
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
+ if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
+ "ENDLOOP instruction doesn't terminate LOOP block.");
+ else
+ --ctx->depth;
+}
+
+static void vsir_validate_endrep(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
+{
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
+ if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP)
+ validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
+ "ENDREP instruction doesn't terminate REP block.");
+ else
+ --ctx->depth;
+}
+
+static void vsir_validate_if(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
+{
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
+ vsir_validator_push_block(ctx, VKD3DSIH_IF);
+}
+
+static void vsir_validate_ifc(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
+{
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
+ vsir_validator_push_block(ctx, VKD3DSIH_IF);
+}
+
+static void vsir_validate_loop(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
+{
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
+ vsir_validate_src_count(ctx, instruction, ctx->program->shader_version.major <= 3 ? 2 : 0);
+ vsir_validator_push_block(ctx, VKD3DSIH_LOOP);
}
+static void vsir_validate_rep(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction)
+{
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
+ vsir_validator_push_block(ctx, VKD3DSIH_REP);
+}
+
+struct vsir_validator_instruction_desc
+{
+ unsigned int dst_param_count;
+ unsigned int src_param_count;
+ void (*validate)(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction);
+};
+
+static const struct vsir_validator_instruction_desc vsir_validator_instructions[] =
+{
+ [VKD3DSIH_DCL_TEMPS] = {0, 0, vsir_validate_dcl_temps},
+ [VKD3DSIH_ELSE] = {0, 0, vsir_validate_else},
+ [VKD3DSIH_ENDIF] = {0, 0, vsir_validate_endif},
+ [VKD3DSIH_ENDLOOP] = {0, 0, vsir_validate_endloop},
+ [VKD3DSIH_ENDREP] = {0, 0, vsir_validate_endrep},
+ [VKD3DSIH_IF] = {0, 1, vsir_validate_if},
+ [VKD3DSIH_IFC] = {0, 2, vsir_validate_ifc},
+ [VKD3DSIH_LOOP] = {0, ~0u, vsir_validate_loop},
+ [VKD3DSIH_REP] = {0, 1, vsir_validate_rep},
+};
+
static void vsir_validate_instruction(struct validation_context *ctx)
{
const struct vkd3d_shader_version *version = &ctx->program->shader_version;
@@ -6254,24 +6368,8 @@ static void vsir_validate_instruction(struct validation_context *ctx)
"Instruction %#x appear before any phase instruction in a hull shader.",
instruction->opcode);
- /* We support two different control flow types in shaders:
- * block-based, like DXIL and SPIR-V, and structured, like D3DBC
- * and TPF. The shader is detected as block-based when its first
- * instruction, except for NOP, DCL_* and phases, is a LABEL.
- * Currently we mandate that each shader is either purely block-based or
- * purely structured. In principle we could allow structured
- * constructs in a block, provided they are confined in a single
- * block, but need for that hasn't arisen yet, so we don't. */
- if (ctx->cf_type == CF_TYPE_UNKNOWN && instruction->opcode != VKD3DSIH_NOP
- && !vsir_instruction_is_dcl(instruction))
- {
- if (instruction->opcode == VKD3DSIH_LABEL)
- ctx->cf_type = CF_TYPE_BLOCKS;
- else
- ctx->cf_type = CF_TYPE_STRUCTURED;
- }
-
- if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction))
+ if (ctx->program->cf_type == VSIR_CF_BLOCKS && !vsir_instruction_is_dcl(instruction)
+ && instruction->opcode != VKD3DSIH_NOP)
{
switch (instruction->opcode)
{
@@ -6300,98 +6398,26 @@ static void vsir_validate_instruction(struct validation_context *ctx)
}
}
- switch (instruction->opcode)
+ if (instruction->opcode < ARRAY_SIZE(vsir_validator_instructions))
{
- case VKD3DSIH_DCL_TEMPS:
- vsir_validate_dst_count(ctx, instruction, 0);
- vsir_validate_src_count(ctx, instruction, 0);
- if (ctx->dcl_temps_found)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS, "Duplicate DCL_TEMPS instruction.");
- if (instruction->declaration.count > ctx->program->temp_count)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS,
- "Invalid DCL_TEMPS count %u, expected at most %u.",
- instruction->declaration.count, ctx->program->temp_count);
- ctx->dcl_temps_found = true;
- break;
-
- case VKD3DSIH_IF:
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
- vsir_validate_dst_count(ctx, instruction, 0);
- vsir_validate_src_count(ctx, instruction, 1);
- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
- return;
- ctx->blocks[ctx->depth++] = instruction->opcode;
- break;
+ const struct vsir_validator_instruction_desc *desc;
- case VKD3DSIH_IFC:
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
- vsir_validate_dst_count(ctx, instruction, 0);
- vsir_validate_src_count(ctx, instruction, 2);
- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
- return;
- ctx->blocks[ctx->depth++] = VKD3DSIH_IF;
- break;
+ desc = &vsir_validator_instructions[instruction->opcode];
- case VKD3DSIH_ELSE:
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
- vsir_validate_dst_count(ctx, instruction, 0);
- vsir_validate_src_count(ctx, instruction, 0);
- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block.");
- else
- ctx->blocks[ctx->depth - 1] = instruction->opcode;
- break;
-
- case VKD3DSIH_ENDIF:
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
- vsir_validate_dst_count(ctx, instruction, 0);
- vsir_validate_src_count(ctx, instruction, 0);
- if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE))
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDIF instruction doesn't terminate IF/ELSE block.");
- else
- --ctx->depth;
- break;
-
- case VKD3DSIH_LOOP:
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
- vsir_validate_dst_count(ctx, instruction, 0);
- vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0);
- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
- return;
- ctx->blocks[ctx->depth++] = instruction->opcode;
- break;
-
- case VKD3DSIH_ENDLOOP:
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
- vsir_validate_dst_count(ctx, instruction, 0);
- vsir_validate_src_count(ctx, instruction, 0);
- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDLOOP instruction doesn't terminate LOOP block.");
- else
- --ctx->depth;
- break;
-
- case VKD3DSIH_REP:
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
- vsir_validate_dst_count(ctx, instruction, 0);
- vsir_validate_src_count(ctx, instruction, 1);
- if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
- return;
- ctx->blocks[ctx->depth++] = instruction->opcode;
- break;
-
- case VKD3DSIH_ENDREP:
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
- vsir_validate_dst_count(ctx, instruction, 0);
- vsir_validate_src_count(ctx, instruction, 0);
- if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP)
- validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDREP instruction doesn't terminate REP block.");
- else
- --ctx->depth;
- break;
+ if (desc->validate)
+ {
+ if (desc->dst_param_count != ~0u)
+ vsir_validate_dst_count(ctx, instruction, desc->dst_param_count);
+ if (desc->src_param_count != ~0u)
+ vsir_validate_src_count(ctx, instruction, desc->src_param_count);
+ desc->validate(ctx, instruction);
+ }
+ }
+ switch (instruction->opcode)
+ {
case VKD3DSIH_SWITCH:
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 1);
if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
@@ -6400,7 +6426,7 @@ static void vsir_validate_instruction(struct validation_context *ctx)
break;
case VKD3DSIH_ENDSWITCH:
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_STRUCTURED);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 0);
if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH)
@@ -6415,7 +6441,7 @@ static void vsir_validate_instruction(struct validation_context *ctx)
break;
case VKD3DSIH_LABEL:
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 1);
if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg))
@@ -6425,7 +6451,7 @@ static void vsir_validate_instruction(struct validation_context *ctx)
break;
case VKD3DSIH_BRANCH:
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS);
vsir_validate_dst_count(ctx, instruction, 0);
if (!vsir_validate_src_min_count(ctx, instruction, 1))
break;
@@ -6465,7 +6491,7 @@ static void vsir_validate_instruction(struct validation_context *ctx)
{
unsigned int case_count;
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS);
vsir_validate_dst_count(ctx, instruction, 0);
/* Parameters are source, default label, merge label and
* then pairs of constant value and case label. */
@@ -6510,7 +6536,7 @@ static void vsir_validate_instruction(struct validation_context *ctx)
{
unsigned int incoming_count;
- vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
+ vsir_validate_cf_type(ctx, instruction, VSIR_CF_BLOCKS);
vsir_validate_dst_count(ctx, instruction, 1);
vsir_validate_src_min_count(ctx, instruction, 2);
if (instruction->src_count % 2 != 0)
@@ -6590,7 +6616,8 @@ enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t c
if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas))))
goto fail;
- for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx)
+ for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count
+ && ctx.status != VKD3D_ERROR_OUT_OF_MEMORY; ++ctx.instruction_idx)
vsir_validate_instruction(&ctx);
ctx.invalid_instruction_idx = true;
@@ -6685,7 +6712,8 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t
vsir_transform(&ctx, vsir_program_remove_dead_code);
vsir_transform(&ctx, vsir_program_normalise_combined_samplers);
- if (compile_info->target_type != VKD3D_SHADER_TARGET_GLSL)
+ if (compile_info->target_type != VKD3D_SHADER_TARGET_GLSL
+ && compile_info->target_type != VKD3D_SHADER_TARGET_MSL)
vsir_transform(&ctx, vsir_program_flatten_control_flow_constructs);
}
diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c
index b76a596bb60..a9d6c9e7c13 100644
--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c
@@ -23,6 +23,7 @@
#include "hlsl.h"
#include "vkd3d_shader_private.h"
+#include "d3dcommon.h"
#define SM4_MAX_SRC_COUNT 6
#define SM4_MAX_DST_COUNT 2
@@ -616,6 +617,33 @@ enum vkd3d_sm4_shader_data_type
VKD3D_SM4_SHADER_DATA_MESSAGE = 0x4,
};
+enum vkd3d_sm4_stat_field
+{
+ VKD3D_STAT_UNUSED = 0,
+ VKD3D_STAT_INSTR_COUNT,
+ VKD3D_STAT_MOV,
+ VKD3D_STAT_MOVC,
+ VKD3D_STAT_CONV,
+ VKD3D_STAT_FLOAT,
+ VKD3D_STAT_INT,
+ VKD3D_STAT_UINT,
+ VKD3D_STAT_EMIT,
+ VKD3D_STAT_CUT,
+ VKD3D_STAT_SAMPLE,
+ VKD3D_STAT_SAMPLE_C,
+ VKD3D_STAT_SAMPLE_GRAD,
+ VKD3D_STAT_SAMPLE_BIAS,
+ VKD3D_STAT_LOAD,
+ VKD3D_STAT_STORE,
+ VKD3D_STAT_COUNT,
+};
+
+struct vkd3d_sm4_stat_field_info
+{
+ enum vkd3d_sm4_opcode opcode;
+ enum vkd3d_sm4_stat_field field;
+};
+
struct sm4_index_range
{
unsigned int index;
@@ -634,6 +662,7 @@ struct vkd3d_sm4_lookup_tables
const struct vkd3d_sm4_opcode_info *opcode_info_from_sm4[VKD3D_SM4_OP_COUNT];
const struct vkd3d_sm4_register_type_info *register_type_info_from_sm4[VKD3D_SM4_REGISTER_TYPE_COUNT];
const struct vkd3d_sm4_register_type_info *register_type_info_from_vkd3d[VKD3DSPR_COUNT];
+ const struct vkd3d_sm4_stat_field_info *stat_field_from_sm4[VKD3D_SM4_OP_COUNT];
};
struct vkd3d_shader_sm4_parser
@@ -1330,11 +1359,17 @@ static const enum vkd3d_shader_register_precision register_precision_table[] =
/* VKD3D_SM4_REGISTER_PRECISION_MIN_UINT_16 */ VKD3D_SHADER_REGISTER_PRECISION_MIN_UINT_16,
};
+struct sm4_stat
+{
+ uint32_t fields[VKD3D_STAT_COUNT];
+};
+
struct tpf_writer
{
struct hlsl_ctx *ctx;
struct vkd3d_bytecode_buffer *buffer;
struct vkd3d_sm4_lookup_tables lookup;
+ struct sm4_stat *stat;
};
static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
@@ -1662,6 +1697,120 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
{VKD3D_SM5_RT_OUTPUT_STENCIL_REF, VKD3DSPR_OUTSTENCILREF, VKD3D_SM4_SWIZZLE_VEC4},
};
+ static const struct vkd3d_sm4_stat_field_info stat_field_table[] =
+ {
+ {VKD3D_SM4_OP_MOV, VKD3D_STAT_MOV},
+ {VKD3D_SM4_OP_MOVC, VKD3D_STAT_MOVC},
+ {VKD3D_SM5_OP_DMOV, VKD3D_STAT_MOV},
+ {VKD3D_SM5_OP_DMOVC, VKD3D_STAT_MOVC},
+
+ {VKD3D_SM4_OP_ITOF, VKD3D_STAT_CONV},
+ {VKD3D_SM4_OP_FTOI, VKD3D_STAT_CONV},
+ {VKD3D_SM4_OP_FTOU, VKD3D_STAT_CONV},
+ {VKD3D_SM4_OP_UTOF, VKD3D_STAT_CONV},
+ {VKD3D_SM5_OP_DTOU, VKD3D_STAT_CONV},
+ {VKD3D_SM5_OP_UTOD, VKD3D_STAT_CONV},
+ {VKD3D_SM5_OP_DTOF, VKD3D_STAT_CONV},
+ {VKD3D_SM5_OP_FTOD, VKD3D_STAT_CONV},
+ {VKD3D_SM5_OP_DTOI, VKD3D_STAT_CONV},
+ {VKD3D_SM5_OP_ITOD, VKD3D_STAT_CONV},
+ {VKD3D_SM5_OP_F32TOF16, VKD3D_STAT_CONV},
+ {VKD3D_SM5_OP_F16TOF32, VKD3D_STAT_CONV},
+
+ {VKD3D_SM4_OP_ADD, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_DIV, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_DP2, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_DP3, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_DP4, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_EQ, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_EXP, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_FRC, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_GE, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_LT, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_MAD, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_MIN, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_MAX, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_MUL, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_NE, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_ROUND_NE, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_ROUND_NI, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_ROUND_PI, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_ROUND_Z, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_RSQ, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_SQRT, VKD3D_STAT_FLOAT},
+ {VKD3D_SM4_OP_SINCOS, VKD3D_STAT_FLOAT},
+ {VKD3D_SM5_OP_RCP, VKD3D_STAT_FLOAT},
+ {VKD3D_SM5_OP_DADD, VKD3D_STAT_FLOAT},
+ {VKD3D_SM5_OP_DMAX, VKD3D_STAT_FLOAT},
+ {VKD3D_SM5_OP_DMIN, VKD3D_STAT_FLOAT},
+ {VKD3D_SM5_OP_DMUL, VKD3D_STAT_FLOAT},
+ {VKD3D_SM5_OP_DEQ, VKD3D_STAT_FLOAT},
+ {VKD3D_SM5_OP_DGE, VKD3D_STAT_FLOAT},
+ {VKD3D_SM5_OP_DLT, VKD3D_STAT_FLOAT},
+ {VKD3D_SM5_OP_DNE, VKD3D_STAT_FLOAT},
+ {VKD3D_SM5_OP_DDIV, VKD3D_STAT_FLOAT},
+ {VKD3D_SM5_OP_DFMA, VKD3D_STAT_FLOAT},
+ {VKD3D_SM5_OP_DRCP, VKD3D_STAT_FLOAT},
+
+ {VKD3D_SM4_OP_IADD, VKD3D_STAT_INT},
+ {VKD3D_SM4_OP_IEQ, VKD3D_STAT_INT},
+ {VKD3D_SM4_OP_IGE, VKD3D_STAT_INT},
+ {VKD3D_SM4_OP_ILT, VKD3D_STAT_INT},
+ {VKD3D_SM4_OP_IMAD, VKD3D_STAT_INT},
+ {VKD3D_SM4_OP_IMAX, VKD3D_STAT_INT},
+ {VKD3D_SM4_OP_IMIN, VKD3D_STAT_INT},
+ {VKD3D_SM4_OP_IMUL, VKD3D_STAT_INT},
+ {VKD3D_SM4_OP_INE, VKD3D_STAT_INT},
+ {VKD3D_SM4_OP_INEG, VKD3D_STAT_INT},
+ {VKD3D_SM4_OP_ISHL, VKD3D_STAT_INT},
+ {VKD3D_SM4_OP_ISHR, VKD3D_STAT_INT},
+ {VKD3D_SM4_OP_ITOF, VKD3D_STAT_INT},
+
+ {VKD3D_SM4_OP_UDIV, VKD3D_STAT_UINT},
+ {VKD3D_SM4_OP_ULT, VKD3D_STAT_UINT},
+ {VKD3D_SM4_OP_UGE, VKD3D_STAT_UINT},
+ {VKD3D_SM4_OP_UMUL, VKD3D_STAT_UINT},
+ {VKD3D_SM4_OP_UMAX, VKD3D_STAT_UINT},
+ {VKD3D_SM4_OP_UMIN, VKD3D_STAT_UINT},
+ {VKD3D_SM4_OP_USHR, VKD3D_STAT_UINT},
+
+ {VKD3D_SM4_OP_EMIT, VKD3D_STAT_EMIT},
+ {VKD3D_SM4_OP_CUT, VKD3D_STAT_CUT},
+ {VKD3D_SM5_OP_EMIT_STREAM, VKD3D_STAT_EMIT},
+ {VKD3D_SM5_OP_CUT_STREAM, VKD3D_STAT_CUT},
+
+ {VKD3D_SM4_OP_SAMPLE, VKD3D_STAT_SAMPLE},
+ {VKD3D_SM4_OP_SAMPLE_LOD, VKD3D_STAT_SAMPLE},
+ {VKD3D_SM5_OP_SAMPLE_LOD_S, VKD3D_STAT_SAMPLE},
+ {VKD3D_SM5_OP_SAMPLE_CL_S, VKD3D_STAT_SAMPLE},
+ {VKD3D_SM4_OP_GATHER4, VKD3D_STAT_SAMPLE},
+ {VKD3D_SM5_OP_GATHER4_PO, VKD3D_STAT_SAMPLE},
+ {VKD3D_SM4_OP_SAMPLE_C, VKD3D_STAT_SAMPLE_C},
+ {VKD3D_SM4_OP_SAMPLE_C_LZ, VKD3D_STAT_SAMPLE_C},
+ {VKD3D_SM5_OP_SAMPLE_C_LZ_S, VKD3D_STAT_SAMPLE_C},
+ {VKD3D_SM5_OP_SAMPLE_C_CL_S, VKD3D_STAT_SAMPLE_C},
+ {VKD3D_SM5_OP_GATHER4_C, VKD3D_STAT_SAMPLE_C},
+ {VKD3D_SM5_OP_GATHER4_PO_C, VKD3D_STAT_SAMPLE_C},
+ {VKD3D_SM4_OP_SAMPLE_GRAD, VKD3D_STAT_SAMPLE_GRAD},
+ {VKD3D_SM5_OP_SAMPLE_GRAD_CL_S, VKD3D_STAT_SAMPLE_GRAD},
+ {VKD3D_SM4_OP_SAMPLE_B, VKD3D_STAT_SAMPLE_BIAS},
+
+ {VKD3D_SM4_OP_LD, VKD3D_STAT_LOAD},
+ {VKD3D_SM4_OP_LD2DMS, VKD3D_STAT_LOAD},
+ {VKD3D_SM5_OP_LD_UAV_TYPED, VKD3D_STAT_LOAD},
+ {VKD3D_SM5_OP_LD_RAW, VKD3D_STAT_LOAD},
+ {VKD3D_SM5_OP_LD_STRUCTURED, VKD3D_STAT_LOAD},
+ {VKD3D_SM5_OP_LD_S, VKD3D_STAT_LOAD},
+ {VKD3D_SM5_OP_LD2DMS_S, VKD3D_STAT_LOAD},
+ {VKD3D_SM5_OP_LD_UAV_TYPED_S, VKD3D_STAT_LOAD},
+ {VKD3D_SM5_OP_LD_RAW_S, VKD3D_STAT_LOAD},
+ {VKD3D_SM5_OP_LD_STRUCTURED_S, VKD3D_STAT_LOAD},
+
+ {VKD3D_SM5_OP_STORE_UAV_TYPED, VKD3D_STAT_STORE},
+ {VKD3D_SM5_OP_STORE_RAW, VKD3D_STAT_STORE},
+ {VKD3D_SM5_OP_STORE_STRUCTURED,VKD3D_STAT_STORE},
+ };
+
memset(lookup, 0, sizeof(*lookup));
for (i = 0; i < ARRAY_SIZE(opcode_table); ++i)
@@ -1678,12 +1827,21 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
lookup->register_type_info_from_sm4[info->sm4_type] = info;
lookup->register_type_info_from_vkd3d[info->vkd3d_type] = info;
}
+
+ for (i = 0; i < ARRAY_SIZE(stat_field_table); ++i)
+ {
+ const struct vkd3d_sm4_stat_field_info *info = &stat_field_table[i];
+
+ lookup->stat_field_from_sm4[info->opcode] = info;
+ }
}
-static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
+static void tpf_writer_init(struct tpf_writer *tpf, struct hlsl_ctx *ctx, struct sm4_stat *stat,
+ struct vkd3d_bytecode_buffer *buffer)
{
tpf->ctx = ctx;
tpf->buffer = buffer;
+ tpf->stat = stat;
init_sm4_lookup_tables(&tpf->lookup);
}
@@ -1721,6 +1879,16 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type(
return register_type_info->default_src_swizzle_type;
}
+static enum vkd3d_sm4_stat_field get_stat_field_from_sm4_opcode(
+ const struct vkd3d_sm4_lookup_tables *lookup, enum vkd3d_sm4_opcode sm4_opcode)
+{
+ const struct vkd3d_sm4_stat_field_info *field_info;
+
+ if (sm4_opcode >= VKD3D_SM4_OP_COUNT || !(field_info = lookup->stat_field_from_sm4[sm4_opcode]))
+ return VKD3D_STAT_UNUSED;
+ return field_info->field;
+}
+
static enum vkd3d_data_type map_data_type(char t)
{
switch (t)
@@ -2553,7 +2721,7 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro
version.minor = VKD3D_SM4_VERSION_MINOR(version_token);
/* Estimate instruction count to avoid reallocation in most shaders. */
- if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20))
+ if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20, VSIR_CF_STRUCTURED))
return false;
vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name);
sm4->ptr = sm4->start;
@@ -4187,6 +4355,7 @@ static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4
{
struct vkd3d_bytecode_buffer *buffer = tpf->buffer;
uint32_t token = instr->opcode | instr->extra_bits;
+ enum vkd3d_sm4_stat_field stat_field;
unsigned int size, i, j;
size_t token_position;
@@ -4219,6 +4388,11 @@ static void write_sm4_instruction(const struct tpf_writer *tpf, const struct sm4
size = (bytecode_get_size(buffer) - token_position) / sizeof(uint32_t);
token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT);
set_u32(buffer, token_position, token);
+
+ ++tpf->stat->fields[VKD3D_STAT_INSTR_COUNT];
+
+ stat_field = get_stat_field_from_sm4_opcode(&tpf->lookup, instr->opcode & VKD3D_SM4_OPCODE_MASK);
+ ++tpf->stat->fields[stat_field];
}
static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr,
@@ -4579,6 +4753,17 @@ static void write_sm4_dcl_thread_group(const struct tpf_writer *tpf, const uint3
write_sm4_instruction(tpf, &instr);
}
+static void write_sm4_dcl_global_flags(const struct tpf_writer *tpf, uint32_t flags)
+{
+ struct sm4_instruction instr =
+ {
+ .opcode = VKD3D_SM4_OP_DCL_GLOBAL_FLAGS,
+ .extra_bits = flags << VKD3D_SM4_GLOBAL_FLAGS_SHIFT,
+ };
+
+ write_sm4_instruction(tpf, &instr);
+}
+
static void write_sm4_ret(const struct tpf_writer *tpf)
{
struct sm4_instruction instr =
@@ -6017,8 +6202,8 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc
}
}
-static void write_sm4_shdr(struct hlsl_ctx *ctx,
- const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc)
+static void write_sm4_shdr(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func,
+ struct sm4_stat *stat, struct dxbc_writer *dxbc)
{
const struct hlsl_profile_info *profile = ctx->profile;
struct vkd3d_bytecode_buffer buffer = {0};
@@ -6043,7 +6228,7 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
VKD3D_SM4_LIB,
};
- tpf_writer_init(&tpf, ctx, &buffer);
+ tpf_writer_init(&tpf, ctx, stat, &buffer);
extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
@@ -6068,6 +6253,9 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx,
write_sm4_dcl_textures(&tpf, resource, true);
}
+ if (entry_func->early_depth_test && profile->major_version >= 5)
+ write_sm4_dcl_global_flags(&tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL);
+
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
{
if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write))
@@ -6135,8 +6323,58 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
vkd3d_free(flags);
}
+static void write_sm4_stat(struct hlsl_ctx *ctx, const struct sm4_stat *stat, struct dxbc_writer *dxbc)
+{
+ struct vkd3d_bytecode_buffer buffer = {0};
+
+ put_u32(&buffer, stat->fields[VKD3D_STAT_INSTR_COUNT]);
+ put_u32(&buffer, 0); /* Temp count */
+ put_u32(&buffer, 0); /* Def count */
+ put_u32(&buffer, 0); /* DCL count */
+ put_u32(&buffer, stat->fields[VKD3D_STAT_FLOAT]);
+ put_u32(&buffer, stat->fields[VKD3D_STAT_INT]);
+ put_u32(&buffer, stat->fields[VKD3D_STAT_UINT]);
+ put_u32(&buffer, 0); /* Static flow control count */
+ put_u32(&buffer, 0); /* Dynamic flow control count */
+ put_u32(&buffer, 0); /* Macro instruction count */
+ put_u32(&buffer, 0); /* Temp array count */
+ put_u32(&buffer, 0); /* Array instr count */
+ put_u32(&buffer, stat->fields[VKD3D_STAT_CUT]);
+ put_u32(&buffer, stat->fields[VKD3D_STAT_EMIT]);
+ put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE]);
+ put_u32(&buffer, stat->fields[VKD3D_STAT_LOAD]);
+ put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_C]);
+ put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_BIAS]);
+ put_u32(&buffer, stat->fields[VKD3D_STAT_SAMPLE_GRAD]);
+ put_u32(&buffer, stat->fields[VKD3D_STAT_MOV]);
+ put_u32(&buffer, stat->fields[VKD3D_STAT_MOVC]);
+ put_u32(&buffer, stat->fields[VKD3D_STAT_CONV]);
+ put_u32(&buffer, 0); /* Bitwise instructions */
+ put_u32(&buffer, 0); /* Input primitive */
+ put_u32(&buffer, 0); /* GS output topology */
+ put_u32(&buffer, 0); /* GS max output vertex count */
+ put_u32(&buffer, 0); /* Unknown */
+ put_u32(&buffer, 0); /* Unknown */
+ put_u32(&buffer, 0); /* Sample frequency */
+
+ if (hlsl_version_ge(ctx, 5, 0))
+ {
+ put_u32(&buffer, 0); /* GS instance count */
+ put_u32(&buffer, 0); /* Control point count */
+ put_u32(&buffer, 0); /* HS output primitive */
+ put_u32(&buffer, 0); /* HS partitioning */
+ put_u32(&buffer, 0); /* Tessellator domain */
+ put_u32(&buffer, 0); /* Barrier instructions */
+ put_u32(&buffer, 0); /* Interlocked instructions */
+ put_u32(&buffer, stat->fields[VKD3D_STAT_STORE]);
+ }
+
+ add_section(ctx, dxbc, TAG_STAT, &buffer);
+}
+
int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out)
{
+ struct sm4_stat stat = {0};
struct dxbc_writer dxbc;
size_t i;
int ret;
@@ -6146,8 +6384,9 @@ int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_fun
write_sm4_signature(ctx, &dxbc, false);
write_sm4_signature(ctx, &dxbc, true);
write_sm4_rdef(ctx, &dxbc);
- write_sm4_shdr(ctx, entry_func, &dxbc);
+ write_sm4_shdr(ctx, entry_func, &stat, &dxbc);
write_sm4_sfi0(ctx, &dxbc);
+ write_sm4_stat(ctx, &stat, &dxbc);
if (!(ret = ctx->result))
ret = dxbc_writer_write(&dxbc, out);
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
index 60be996ae24..ee98a504a5b 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c
@@ -535,6 +535,8 @@ static const char *shader_get_target_type_suffix(enum vkd3d_shader_target_type t
return "glsl";
case VKD3D_SHADER_TARGET_FX:
return "fx";
+ case VKD3D_SHADER_TARGET_MSL:
+ return "msl";
default:
FIXME("Unhandled target type %#x.\n", type);
return "bin";
@@ -1646,6 +1648,10 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags,
vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info);
break;
+ case VKD3D_SHADER_TARGET_MSL:
+ ret = msl_compile(program, config_flags, compile_info, message_context);
+ break;
+
default:
/* Validation should prevent us from reaching this. */
vkd3d_unreachable();
@@ -1945,6 +1951,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types(
VKD3D_SHADER_TARGET_D3D_ASM,
#ifdef VKD3D_SHADER_UNSUPPORTED_GLSL
VKD3D_SHADER_TARGET_GLSL,
+#endif
+#ifdef VKD3D_SHADER_UNSUPPORTED_MSL
+ VKD3D_SHADER_TARGET_MSL,
#endif
};
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
index bc369ec6866..8866780132e 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
@@ -238,6 +238,8 @@ enum vkd3d_shader_error
VKD3D_SHADER_ERROR_VSIR_INVALID_GS = 9019,
VKD3D_SHADER_WARNING_VSIR_DYNAMIC_DESCRIPTOR_ARRAY = 9300,
+
+ VKD3D_SHADER_ERROR_MSL_INTERNAL = 10000,
};
enum vkd3d_shader_opcode
@@ -1362,6 +1364,12 @@ enum vkd3d_shader_config_flags
VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION = 0x00000001,
};
+enum vsir_control_flow_type
+{
+ VSIR_CF_STRUCTURED,
+ VSIR_CF_BLOCKS,
+};
+
struct vsir_program
{
struct vkd3d_shader_version shader_version;
@@ -1381,6 +1389,7 @@ struct vsir_program
unsigned int temp_count;
unsigned int ssa_count;
bool use_vocp;
+ enum vsir_control_flow_type cf_type;
const char **block_names;
size_t block_name_count;
@@ -1393,7 +1402,7 @@ int vsir_program_compile(struct vsir_program *program, uint64_t config_flags,
const struct vkd3d_shader_parameter1 *vsir_program_get_parameter(
const struct vsir_program *program, enum vkd3d_shader_parameter_name name);
bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info,
- const struct vkd3d_shader_version *version, unsigned int reserve);
+ const struct vkd3d_shader_version *version, unsigned int reserve, enum vsir_control_flow_type cf_type);
enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t config_flags,
const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context);
enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags,
@@ -1593,6 +1602,9 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags,
const struct vkd3d_shader_compile_info *compile_info,
struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context);
+int msl_compile(struct vsir_program *program, uint64_t config_flags,
+ const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context);
+
enum vkd3d_md5_variant
{
VKD3D_MD5_STANDARD,
@@ -1872,7 +1884,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain,
#define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t))
#define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t)
-#define DXBC_MAX_SECTION_COUNT 5
+#define DXBC_MAX_SECTION_COUNT 6
struct dxbc_writer
{
--
2.45.2