From e19754650a2fae6ebfb0a471026cc5e9683a8237 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Tue, 23 Sep 2025 07:11:30 +1000 Subject: [PATCH] Updated vkd3d to cb8c851bec08a9483ce52f57c71922c9b145b9d3. --- libs/vkd3d/include/private/vkd3d_common.h | 18 ++ libs/vkd3d/libs/vkd3d-shader/hlsl.c | 9 + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 6 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 101 ++++++++++ libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 183 ++++++++++++++++++ .../libs/vkd3d-shader/hlsl_constant_ops.c | 106 ++++++++++ libs/vkd3d/libs/vkd3d-shader/ir.c | 64 ++++++ libs/vkd3d/libs/vkd3d-shader/spirv.c | 20 +- libs/vkd3d/libs/vkd3d-shader/tpf.c | 4 + 9 files changed, 504 insertions(+), 7 deletions(-) diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 8b63acf68e1..c2e957a2fea 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -343,6 +343,24 @@ static inline unsigned int vkd3d_log2i(unsigned int x) #endif } +static inline unsigned int vkd3d_ctz(uint32_t v) +{ +#ifdef HAVE_BUILTIN_CTZ + return __builtin_ctz(v); +#else + unsigned int c = 31; + + v &= -v; + c = (v & 0x0000ffff) ? c - 16 : c; + c = (v & 0x00ff00ff) ? c - 8 : c; + c = (v & 0x0f0f0f0f) ? c - 4 : c; + c = (v & 0x33333333) ? c - 2 : c; + c = (v & 0x55555555) ? c - 1 : c; + + return c; +#endif +} + static inline void *vkd3d_memmem( const void *haystack, size_t haystack_len, const void *needle, size_t needle_len) { const char *str = haystack; diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 5a25efdee75..ec1e27d9496 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -393,6 +393,11 @@ bool hlsl_type_is_signed_integer(const struct hlsl_type *type) vkd3d_unreachable(); } +bool hlsl_type_is_unsigned_integer(const struct hlsl_type *type) +{ + return hlsl_type_is_integer(type) && !hlsl_type_is_signed_integer(type); +} + bool hlsl_type_is_integer(const struct hlsl_type *type) { VKD3D_ASSERT(hlsl_is_numeric_type(type)); @@ -3724,8 +3729,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_BIT_NOT] = "~", [HLSL_OP1_CAST] = "cast", [HLSL_OP1_CEIL] = "ceil", + [HLSL_OP1_CLZ] = "clz", [HLSL_OP1_COS] = "cos", [HLSL_OP1_COS_REDUCED] = "cos_reduced", + [HLSL_OP1_COUNTBITS] = "countbits", + [HLSL_OP1_CTZ] = "ctz", [HLSL_OP1_DSX] = "dsx", [HLSL_OP1_DSX_COARSE] = "dsx_coarse", [HLSL_OP1_DSX_FINE] = "dsx_fine", @@ -3735,6 +3743,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_EXP2] = "exp2", [HLSL_OP1_F16TOF32] = "f16tof32", [HLSL_OP1_F32TOF16] = "f32tof16", + [HLSL_OP1_FIND_MSB] = "find_msb", [HLSL_OP1_FLOOR] = "floor", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_ISINF] = "isinf", diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index a3e8ccc1e2a..8dbfd062177 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -718,8 +718,11 @@ enum hlsl_ir_expr_op HLSL_OP1_BIT_NOT, HLSL_OP1_CAST, HLSL_OP1_CEIL, + HLSL_OP1_CLZ, HLSL_OP1_COS, HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi], writes to .x */ + HLSL_OP1_COUNTBITS, + HLSL_OP1_CTZ, HLSL_OP1_DSX, HLSL_OP1_DSX_COARSE, HLSL_OP1_DSX_FINE, @@ -729,6 +732,7 @@ enum hlsl_ir_expr_op HLSL_OP1_EXP2, HLSL_OP1_F16TOF32, HLSL_OP1_F32TOF16, + HLSL_OP1_FIND_MSB, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, HLSL_OP1_ISINF, @@ -1791,10 +1795,10 @@ bool hlsl_type_is_integer(const struct hlsl_type *type); bool hlsl_type_is_floating_point(const struct hlsl_type *type); bool hlsl_type_is_row_major(const struct hlsl_type *type); bool hlsl_type_is_signed_integer(const struct hlsl_type *type); +bool hlsl_type_is_unsigned_integer(const struct hlsl_type *type); unsigned int hlsl_type_minor_size(const struct hlsl_type *type); unsigned int hlsl_type_major_size(const struct hlsl_type *type); unsigned int hlsl_type_element_count(const struct hlsl_type *type); -bool hlsl_type_is_integer(const struct hlsl_type *type); bool hlsl_type_is_minimum_precision(const struct hlsl_type *type); bool hlsl_type_is_resource(const struct hlsl_type *type); bool hlsl_type_is_shader(const struct hlsl_type *type); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index d83ad9fe7d8..d3004d7cc8a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -3181,6 +3181,20 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, return true; } +static bool elementwise_intrinsic_int_convert_args(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *type; + + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_INT, type->e.numeric.dimx, type->e.numeric.dimy); + + convert_args(ctx, params, type, loc); + return true; +} + static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3579,6 +3593,20 @@ static bool intrinsic_cosh(struct hlsl_ctx *ctx, return write_cosh_or_sinh(ctx, params, loc, false); } +static bool intrinsic_countbits(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *type; + + if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) + return false; + type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_COUNTBITS, operands, type, loc); +} + static bool intrinsic_cross(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3925,6 +3953,76 @@ static bool intrinsic_f32tof16(struct hlsl_ctx *ctx, return add_expr(ctx, params->instrs, HLSL_OP1_F32TOF16, operands, type, loc); } +static bool intrinsic_firstbithigh(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *type = params->args[0]->data_type; + struct hlsl_ir_node *c, *clz, *eq, *xor; + struct hlsl_constant_value v; + + if (hlsl_version_lt(ctx, 4, 0)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "The 'firstbithigh' intrinsic requires shader model 4.0 or higher."); + + if (hlsl_type_is_unsigned_integer(type)) + { + if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) + return false; + } + else + { + if (!elementwise_intrinsic_int_convert_args(ctx, params, loc)) + return false; + } + type = convert_numeric_type(ctx, type, HLSL_TYPE_UINT); + + operands[0] = params->args[0]; + if (hlsl_version_lt(ctx, 5, 0)) + return add_expr(ctx, params->instrs, HLSL_OP1_FIND_MSB, operands, type, loc); + + v.u[0].u = 0x1f; + if (!(c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &v, loc))) + return false; + hlsl_block_add_instr(params->instrs, c); + + if (!(clz = add_expr(ctx, params->instrs, HLSL_OP1_CLZ, operands, type, loc))) + return false; + if (!(xor = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_BIT_XOR, c, clz, loc))) + return false; + + v.u[0].i = -1; + if (!(c = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &v, loc))) + return false; + hlsl_block_add_instr(params->instrs, c); + + if (!(eq = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_EQUAL, clz, c, loc))) + return false; + + operands[0] = eq; + operands[1] = add_implicit_conversion(ctx, params->instrs, c, type, loc); + operands[2] = xor; + return add_expr(ctx, params->instrs, HLSL_OP3_TERNARY, operands, type, loc); +} + +static bool intrinsic_firstbitlow(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *type; + + if (hlsl_version_lt(ctx, 4, 0)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "The 'firstbitlow' intrinsic requires shader model 4.0 or higher."); + + if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) + return false; + type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_UINT); + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_CTZ, operands, type, loc); +} + static bool intrinsic_floor(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -5317,6 +5415,7 @@ intrinsic_functions[] = {"clip", 1, true, intrinsic_clip}, {"cos", 1, true, intrinsic_cos}, {"cosh", 1, true, intrinsic_cosh}, + {"countbits", 1, true, intrinsic_countbits}, {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, @@ -5334,6 +5433,8 @@ intrinsic_functions[] = {"f16tof32", 1, true, intrinsic_f16tof32}, {"f32tof16", 1, true, intrinsic_f32tof16}, {"faceforward", 3, true, intrinsic_faceforward}, + {"firstbithigh", 1, true, intrinsic_firstbithigh}, + {"firstbitlow", 1, true, intrinsic_firstbitlow}, {"floor", 1, true, intrinsic_floor}, {"fmod", 2, true, intrinsic_fmod}, {"frac", 1, true, intrinsic_frac}, diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index bb8fdb8bd60..2138c3776a7 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -11057,11 +11057,32 @@ static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx, generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_ROUND_PI, 0, 0, true); return true; + case HLSL_OP1_CLZ: + VKD3D_ASSERT(hlsl_type_is_integer(dst_type)); + VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); + if (hlsl_type_is_signed_integer(src_type)) + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_FIRSTBIT_SHI, 0, 0, true); + else + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_FIRSTBIT_HI, 0, 0, true); + return true; + case HLSL_OP1_COS: VKD3D_ASSERT(type_is_float(dst_type)); sm4_generate_vsir_expr_with_two_destinations(ctx, program, VSIR_OP_SINCOS, expr, 1); return true; + case HLSL_OP1_COUNTBITS: + VKD3D_ASSERT(hlsl_type_is_integer(dst_type)); + VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_COUNTBITS, 0, 0, true); + return true; + + case HLSL_OP1_CTZ: + VKD3D_ASSERT(hlsl_type_is_integer(dst_type)); + VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0)); + generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_FIRSTBIT_LO, 0, 0, true); + return true; + case HLSL_OP1_DSX: VKD3D_ASSERT(type_is_float(dst_type)); generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VSIR_OP_DSX, 0, 0, true); @@ -14097,6 +14118,102 @@ static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc hlsl_transform_ir(ctx, resolve_loops, block, NULL); } +static bool lower_countbits(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_ir_node *call, *rhs; + struct hlsl_ir_expr *expr; + struct hlsl_ir_var *lhs; + char *body; + + /* Like vkd3d_popcount(). */ + static const char template[] = + "typedef uint%u uintX;\n" + "uintX countbits(uintX v)\n" + "{\n" + " v -= (v >> 1) & 0x55555555;\n" + " v = (v & 0x33333333) + ((v >> 2) & 0x33333333);\n" + " return (((v + (v >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24;\n" + "}\n"; + + if (node->type != HLSL_IR_EXPR) + return false; + + expr = hlsl_ir_expr(node); + if (expr->op != HLSL_OP1_COUNTBITS) + return false; + + rhs = expr->operands[0].node; + if (!(body = hlsl_sprintf_alloc(ctx, template, hlsl_type_component_count(rhs->data_type)))) + return false; + func = hlsl_compile_internal_function(ctx, "countbits", body); + vkd3d_free(body); + if (!func) + return false; + + lhs = func->parameters.vars[0]; + hlsl_block_add_simple_store(ctx, block, lhs, rhs); + + if (!(call = hlsl_new_call(ctx, func, &node->loc))) + return false; + hlsl_block_add_instr(block, call); + + hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc); + + return true; +} + +static bool lower_ctz(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_ir_node *call, *rhs; + struct hlsl_ir_expr *expr; + struct hlsl_ir_var *lhs; + char *body; + + /* ctz() returns the bit number of the least significant 1-bit. + * Bit numbers count from the least significant bit. */ + static const char template[] = + "typedef uint%u uintX;\n" + "uintX ctz(uintX v)\n" + "{\n" + " uintX c = 31;\n" + " v &= -v;\n" + " c = (v & 0x0000ffff) ? c - 16 : c;\n" + " c = (v & 0x00ff00ff) ? c - 8 : c;\n" + " c = (v & 0x0f0f0f0f) ? c - 4 : c;\n" + " c = (v & 0x33333333) ? c - 2 : c;\n" + " c = (v & 0x55555555) ? c - 1 : c;\n" + " return v ? c : -1;\n" + "}\n"; + + if (node->type != HLSL_IR_EXPR) + return false; + + expr = hlsl_ir_expr(node); + if (expr->op != HLSL_OP1_CTZ) + return false; + + rhs = expr->operands[0].node; + if (!(body = hlsl_sprintf_alloc(ctx, template, hlsl_type_component_count(rhs->data_type)))) + return false; + func = hlsl_compile_internal_function(ctx, "ctz", body); + vkd3d_free(body); + if (!func) + return false; + + lhs = func->parameters.vars[0]; + hlsl_block_add_simple_store(ctx, block, lhs, rhs); + + if (!(call = hlsl_new_call(ctx, func, &node->loc))) + return false; + hlsl_block_add_instr(block, call); + + hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc); + + return true; +} + static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) { struct hlsl_ir_function_decl *func; @@ -14239,6 +14356,69 @@ static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, stru return true; } +static bool lower_find_msb(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_ir_node *call, *rhs; + struct hlsl_ir_expr *expr; + struct hlsl_ir_var *lhs; + char *body; + + /* For positive numbers, find_msb() returns the bit number of the most + * significant 1-bit. For negative numbers, it returns the bit number of + * the most significant 0-bit. Bit numbers count from the least + * significant bit. */ + static const char template[] = + "typedef %s intX;\n" + "uint%u find_msb(intX v)\n" + "{\n" + " intX c, mask;\n" + " v = v < 0 ? ~v : v;\n" + " mask = v & 0xffff0000;\n" + " v = mask ? mask : v;\n" + " c = mask ? 16 : v ? 0 : -1;\n" + " mask = v & 0xff00ff00;\n" + " v = mask ? mask : v;\n" + " c = mask ? c + 8 : c;\n" + " mask = v & 0xf0f0f0f0;\n" + " v = mask ? mask : v;\n" + " c = mask ? c + 4 : c;\n" + " mask = v & 0xcccccccc;\n" + " v = mask ? mask : v;\n" + " c = mask ? c + 2 : c;\n" + " mask = v & 0xaaaaaaaa;\n" + " v = mask ? mask : v;\n" + " c = mask ? c + 1 : c;\n" + " return c;\n" + "}\n"; + + if (node->type != HLSL_IR_EXPR) + return false; + + expr = hlsl_ir_expr(node); + if (expr->op != HLSL_OP1_FIND_MSB) + return false; + + rhs = expr->operands[0].node; + if (!(body = hlsl_sprintf_alloc(ctx, template, rhs->data_type->name, hlsl_type_component_count(rhs->data_type)))) + return false; + func = hlsl_compile_internal_function(ctx, "find_msb", body); + vkd3d_free(body); + if (!func) + return false; + + lhs = func->parameters.vars[0]; + hlsl_block_add_simple_store(ctx, block, lhs, rhs); + + if (!(call = hlsl_new_call(ctx, func, &node->loc))) + return false; + hlsl_block_add_instr(block, call); + + hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc); + + return true; +} + static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) { struct hlsl_ir_function_decl *func; @@ -14355,8 +14535,11 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct list *semantic_v if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) { + lower_ir(ctx, lower_countbits, body); + lower_ir(ctx, lower_ctz, body); lower_ir(ctx, lower_f16tof32, body); lower_ir(ctx, lower_f32tof16, body); + lower_ir(ctx, lower_find_msb, body); } lower_ir(ctx, lower_isinf, body); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index 252ed51a4e4..7b3b0470d5d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -250,6 +250,35 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, return true; } +static bool fold_clz(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = src->node.data_type->e.numeric.type; + unsigned int k, v; + + for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_INT: + v = src->value.u[k].i < 0 ? ~src->value.u[k].u : src->value.u[k].u; + break; + + case HLSL_TYPE_UINT: + v = src->value.u[k].u; + break; + + default: + FIXME("Fold 'clz' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + + dst->u[k].u = v ? vkd3d_log2i(v) ^ 0x1f : ~0u; + } + + return true; +} + static bool fold_cos(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { @@ -280,6 +309,59 @@ static bool fold_cos(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, return true; } +static bool fold_countbits(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_UINT: + dst->u[k].u = vkd3d_popcount(src->value.u[k].u); + break; + + default: + FIXME("Fold 'countbits' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + +static bool fold_ctz(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, + const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) +{ + enum hlsl_base_type type = dst_type->e.numeric.type; + unsigned int k; + + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); + + for (k = 0; k < dst_type->e.numeric.dimx; ++k) + { + switch (type) + { + case HLSL_TYPE_UINT: + if (!src->value.u[k].u) + dst->u[k].u = ~0u; + else + dst->u[k].u = vkd3d_ctz(src->value.u[k].u); + break; + + default: + FIXME("Fold 'ctz' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src) { @@ -1403,10 +1485,22 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_ceil(ctx, &res, instr->data_type, arg1); break; + case HLSL_OP1_CLZ: + success = fold_clz(ctx, &res, instr->data_type, arg1); + break; + case HLSL_OP1_COS: success = fold_cos(ctx, &res, instr->data_type, arg1); break; + case HLSL_OP1_COUNTBITS: + success = fold_countbits(ctx, &res, instr->data_type, arg1); + break; + + case HLSL_OP1_CTZ: + success = fold_ctz(ctx, &res, instr->data_type, arg1); + break; + case HLSL_OP1_EXP2: success = fold_exp2(ctx, &res, instr->data_type, arg1); break; @@ -1555,6 +1649,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { + static const struct hlsl_constant_value zero; struct hlsl_ir_constant *const_arg = NULL; struct hlsl_ir_node *mut_arg = NULL; struct hlsl_ir_node *res_node; @@ -1617,6 +1712,17 @@ bool hlsl_fold_constant_identities(struct hlsl_ctx *ctx, struct hlsl_ir_node *in res_node = &const_arg->node; break; + case HLSL_OP2_LESS: + /* x < 0 -> false, if x is unsigned. */ + if (!hlsl_type_is_unsigned_integer(expr->operands[0].node->data_type) + || expr->operands[1].node->type != HLSL_IR_CONSTANT + || !hlsl_constant_is_zero(hlsl_ir_constant(expr->operands[1].node))) + break; + if (!(res_node = hlsl_new_constant(ctx, instr->data_type, &zero, &instr->loc))) + break; + list_add_before(&expr->node.entry, &res_node->entry); + break; + default: break; } diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index a242f32d824..d765b5acccd 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -9885,6 +9885,37 @@ static void vsir_validate_io_register(struct validation_context *ctx, const stru reg->idx[control_point_index].offset, control_point_count, reg->type); } +static void vsir_validate_texture_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) +{ + const struct vkd3d_shader_version *version = &ctx->program->shader_version; + uint32_t idx; + + if (version->type != VKD3D_SHADER_TYPE_PIXEL) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "TEXTURE registers cannot be used in shader type %#x.", version->type); + + if (reg->idx_count != 1) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, + "Invalid index count %u for a TEXTURE register.", reg->idx_count); + return; + } + + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Non-NULL relative address for a TEXTURE register."); + + if (version->major >= 3) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "TEXTURE registers cannot be used in version %u.%u.", version->major, version->minor); + + idx = reg->idx[0].offset; + if (idx >= 8 || (vkd3d_shader_ver_le(version, 1, 4) && idx >= 6) + || (vkd3d_shader_ver_le(version, 1, 3) && idx >= 4)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Register t%u exceeds limits for version %u.%u.", idx, version->major, version->minor); +} + static void vsir_validate_temp_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) { @@ -10354,6 +10385,10 @@ static void vsir_validate_register(struct validation_context *ctx, vsir_validate_register_without_indices(ctx, reg); break; + case VKD3DSPR_TEXTURE: + vsir_validate_texture_register(ctx, reg); + break; + case VKD3DSPR_SSA: vsir_validate_ssa_register(ctx, reg); break; @@ -10402,6 +10437,8 @@ static void vsir_validate_io_dst_param(struct validation_context *ctx, static void vsir_validate_dst_param(struct validation_context *ctx, const struct vkd3d_shader_dst_param *dst) { + const struct vkd3d_shader_version *version = &ctx->program->shader_version; + vsir_validate_register(ctx, &dst->reg); if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL) @@ -10531,6 +10568,11 @@ static void vsir_validate_dst_param(struct validation_context *ctx, vsir_validate_io_dst_param(ctx, dst); break; + case VKD3DSPR_TEXTURE: + if (vkd3d_shader_ver_ge(version, 1, 4)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "Texture registers cannot be written in version %u.%u.", version->major, version->minor); + default: break; } @@ -11511,6 +11553,27 @@ static void vsir_validate_shift_operation(struct validation_context *ctx, vsir_opcode_get_name(instruction->opcode, ""), instruction->opcode); } +static void vsir_validate_bem(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) +{ + const struct vkd3d_shader_version *version = &ctx->program->shader_version; + + if (version->type != VKD3D_SHADER_TYPE_PIXEL) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, + "BEM cannot be used in shader type %#x.", version->type); + + if (version->major != 1 || version->minor != 4) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_OPCODE, + "BEM cannot be used in version %u.%u.", version->major, version->minor); + + if (instruction->dst[0].write_mask != 0x3) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, + "Invalid BEM write mask %#x.", instruction->dst[0].write_mask); + + /* Strictly not an elementwise operation, but we expect all the arguments + * to be float. */ + vsir_validate_float_elementwise_operation(ctx, instruction); +} + static void vsir_validate_branch(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction) { size_t i; @@ -12353,6 +12416,7 @@ static const struct vsir_validator_instruction_desc vsir_validator_instructions[ [VSIR_OP_AND] = {1, 2, vsir_validate_logic_elementwise_operation}, [VSIR_OP_ASIN] = {1, 1, vsir_validate_float_elementwise_operation}, [VSIR_OP_ATAN] = {1, 1, vsir_validate_float_elementwise_operation}, + [VSIR_OP_BEM] = {1, 2, vsir_validate_bem}, [VSIR_OP_BRANCH] = {0, ~0u, vsir_validate_branch}, [VSIR_OP_DADD] = {1, 2, vsir_validate_double_elementwise_operation}, [VSIR_OP_DCL_GS_INSTANCES] = {0, 0, vsir_validate_dcl_gs_instances}, diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 0fdeba75c58..434be05e786 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -3254,6 +3254,14 @@ static struct spirv_compiler *spirv_compiler_create(struct vsir_program *program compiler->features = option->value; break; + case VKD3D_SHADER_COMPILE_OPTION_PACK_MATRIX_ORDER: + case VKD3D_SHADER_COMPILE_OPTION_BACKWARD_COMPATIBILITY: + case VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT: + case VKD3D_SHADER_COMPILE_OPTION_WARN_IMPLICIT_TRUNCATION: + case VKD3D_SHADER_COMPILE_OPTION_INCLUDE_EMPTY_BUFFERS_IN_EFFECTS: + /* Explicitly ignored for this target. */ + break; + default: WARN("Ignoring unrecognised option %#x with value %#x.\n", option->name, option->value); break; @@ -8347,8 +8355,8 @@ static void spirv_compiler_emit_f32tof16(struct spirv_compiler *compiler, unsigned int i, j; instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); - type_id = spirv_get_type_id_for_component_type(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2); - scalar_type_id = spirv_get_type_id_for_component_type(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(builder, VSIR_DATA_F32, 2); + scalar_type_id = spirv_get_type_id(builder, VSIR_DATA_U32, 1); zero_id = spirv_compiler_get_constant_float(compiler, 0.0f); /* FIXME: Consider a single PackHalf2x16 instruction per 2 components. */ @@ -8427,7 +8435,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co src0_id = spirv_compiler_emit_load_src(compiler, &src[0], write_mask); src1_id = spirv_compiler_emit_load_src(compiler, &src[1], write_mask); - type_id = spirv_get_type_id_for_component_type(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + type_id = spirv_get_type_id(builder, VSIR_DATA_BOOL, component_count); result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, src0_id, src1_id); @@ -8479,7 +8487,7 @@ static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compil src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); - type_id = spirv_get_type_id_for_component_type(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + type_id = spirv_get_type_id(builder, VSIR_DATA_BOOL, component_count); result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, src0_id, src1_id); result_id = spirv_compiler_emit_bool_to_float(compiler, component_count, result_id, false); @@ -9054,7 +9062,7 @@ static void spirv_compiler_emit_lod(struct spirv_compiler *compiler, spirv_compiler_prepare_image(compiler, &image, &resource->reg, &sampler->reg, VKD3D_IMAGE_FLAG_SAMPLED); - type_id = spirv_get_type_id_for_component_type(builder, VKD3D_SHADER_COMPONENT_FLOAT, 2); + type_id = spirv_get_type_id(builder, VSIR_DATA_F32, 2); coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL); val_id = vkd3d_spirv_build_op_image_query_lod(builder, type_id, image.sampled_image_id, coordinate_id); @@ -9388,7 +9396,7 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, if (!spirv_compiler_get_register_info(compiler, &resource->reg, ®_info)) return; - type_id = spirv_get_type_id_for_component_type(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = spirv_get_type_id(builder, VSIR_DATA_U32, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index 7ff2a305cfa..8f2d3dd48ca 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -4248,6 +4248,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VSIR_OP_BREAK: case VSIR_OP_CASE: case VSIR_OP_CONTINUE: + case VSIR_OP_COUNTBITS: case VSIR_OP_CUT: case VSIR_OP_CUT_STREAM: case VSIR_OP_DCL_STREAM: @@ -4273,6 +4274,9 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VSIR_OP_EXP: case VSIR_OP_F16TOF32: case VSIR_OP_F32TOF16: + case VSIR_OP_FIRSTBIT_HI: + case VSIR_OP_FIRSTBIT_LO: + case VSIR_OP_FIRSTBIT_SHI: case VSIR_OP_FRC: case VSIR_OP_FTOI: case VSIR_OP_FTOU: -- 2.51.0