From 86412218c44000e79015064e3efe32a3783dab6b Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Fri, 4 Apr 2025 07:59:53 +1100 Subject: [PATCH] Updated vkd3d to f576ecc9929dd98c900bb8bc0335b91a1a0d3bff. --- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 32 +- libs/vkd3d/libs/vkd3d-shader/fx.c | 4 +- libs/vkd3d/libs/vkd3d-shader/glsl.c | 3 +- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 38 + libs/vkd3d/libs/vkd3d-shader/hlsl.h | 3 + libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 738 ++++++++++++++++-- libs/vkd3d/libs/vkd3d-shader/ir.c | 187 ++++- libs/vkd3d/libs/vkd3d-shader/msl.c | 1 + libs/vkd3d/libs/vkd3d-shader/spirv.c | 1 + .../libs/vkd3d-shader/vkd3d_shader_main.c | 12 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 4 + 11 files changed, 911 insertions(+), 112 deletions(-) diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 58e35cf22e8..b49ef9865db 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -1759,27 +1759,40 @@ static bool is_inconsequential_instr(const struct vkd3d_shader_instruction *ins) static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_dst_param *reg) { + uint32_t offset = reg->reg.idx_count ? reg->reg.idx[0].offset : 0; + VKD3D_ASSERT(reg->write_mask); put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER | sm1_encode_register_type(®->reg) | (reg->modifiers << VKD3D_SM1_DST_MODIFIER_SHIFT) | (reg->write_mask << VKD3D_SM1_WRITEMASK_SHIFT) - | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); + | (offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); } static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, const struct vkd3d_shader_src_param *reg) { + uint32_t address_mode = VKD3D_SM1_ADDRESS_MODE_ABSOLUTE, offset = 0; + + if (reg->reg.idx_count) + { + offset = reg->reg.idx[0].offset; + if (reg->reg.idx[0].rel_addr) + address_mode = VKD3D_SM1_ADDRESS_MODE_RELATIVE; + } + put_u32(buffer, VKD3D_SM1_INSTRUCTION_PARAMETER | sm1_encode_register_type(®->reg) + | (address_mode << VKD3D_SM1_ADDRESS_MODE_SHIFT) | (reg->modifiers << VKD3D_SM1_SRC_MODIFIER_SHIFT) | (swizzle_from_vsir(reg->swizzle) << VKD3D_SM1_SWIZZLE_SHIFT) - | (reg->reg.idx[0].offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); + | (offset & VKD3D_SM1_REGISTER_NUMBER_MASK)); } static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct vkd3d_shader_instruction *ins) { const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + const struct vkd3d_shader_src_param *src; const struct vkd3d_sm1_opcode_info *info; unsigned int i; uint32_t token; @@ -1810,13 +1823,10 @@ static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct v for (i = 0; i < ins->src_count; ++i) { - if (ins->src[i].reg.idx[0].rel_addr) - { - vkd3d_shader_error(d3dbc->message_context, &ins->location, VKD3D_SHADER_ERROR_D3DBC_NOT_IMPLEMENTED, - "Unhandled relative addressing on source register."); - d3dbc->failed = true; - } - write_sm1_src_register(buffer, &ins->src[i]); + src = &ins->src[i]; + write_sm1_src_register(buffer, src); + if (src->reg.idx_count && src->reg.idx[0].rel_addr) + write_sm1_src_register(buffer, src->reg.idx[0].rel_addr); } }; @@ -1831,6 +1841,7 @@ static void d3dbc_write_vsir_def(struct d3dbc_compiler *d3dbc, const struct vkd3 .reg.type = VKD3DSPR_CONST, .write_mask = VKD3DSP_WRITEMASK_ALL, .reg.idx[0].offset = ins->dst[0].reg.idx[0].offset, + .reg.idx_count = 1, }; token = VKD3D_SM1_OP_DEF; @@ -1863,6 +1874,7 @@ static void d3dbc_write_vsir_sampler_dcl(struct d3dbc_compiler *d3dbc, reg.reg.type = VKD3DSPR_COMBINED_SAMPLER; reg.write_mask = VKD3DSP_WRITEMASK_ALL; reg.reg.idx[0].offset = reg_id; + reg.reg.idx_count = 1; write_sm1_dst_register(buffer, ®); } @@ -1938,6 +1950,7 @@ static void d3dbc_write_vsir_instruction(struct d3dbc_compiler *d3dbc, const str case VKD3DSIH_MAX: case VKD3DSIH_MIN: case VKD3DSIH_MOV: + case VKD3DSIH_MOVA: case VKD3DSIH_MUL: case VKD3DSIH_SINCOS: case VKD3DSIH_SLT: @@ -1982,6 +1995,7 @@ static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, uint32_t token, usage_idx; bool ret; + reg.reg.idx_count = 1; if (sm1_register_from_semantic_name(version, element->semantic_name, element->semantic_index, output, ®.reg.type, ®.reg.idx[0].offset)) { diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index 7a226c1c870..debcb261811 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -2366,6 +2366,7 @@ static inline bool is_object_fx_type(enum state_property_component_type type) case FX_BLEND: case FX_VERTEXSHADER: case FX_PIXELSHADER: + case FX_GEOMETRYSHADER: return true; default: return false; @@ -2761,7 +2762,8 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl struct hlsl_ir_constant *c = hlsl_ir_constant(node); struct hlsl_type *data_type = c->node.data_type; - if (data_type->class == HLSL_CLASS_SCALAR && data_type->e.numeric.type == HLSL_TYPE_UINT) + if (data_type->class == HLSL_CLASS_SCALAR + && (data_type->e.numeric.type == HLSL_TYPE_INT || data_type->e.numeric.type == HLSL_TYPE_UINT)) { if (c->value.u[0].u != 0) hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index 828a94d77ab..a87ade5e467 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -1296,7 +1296,7 @@ static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, st vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, "Internal compiler error: Unhandled SV_POSITION index %u.", idx); if (version->type == VKD3D_SHADER_TYPE_PIXEL) - vkd3d_string_buffer_printf(buffer, "gl_FragCoord"); + vkd3d_string_buffer_printf(buffer, "vec4(gl_FragCoord.xyz, 1.0 / gl_FragCoord.w)"); else vkd3d_string_buffer_printf(buffer, "gl_Position"); break; @@ -2468,6 +2468,7 @@ int glsl_compile(struct vsir_program *program, uint64_t config_flags, return ret; VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); + VKD3D_ASSERT(program->has_descriptor_info); vkd3d_glsl_generator_init(&generator, program, compile_info, combined_sampler_info, message_context); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 01586592b25..d1d20b7384c 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -1588,6 +1588,43 @@ void hlsl_block_add_simple_store(struct hlsl_ctx *ctx, struct hlsl_block *block, hlsl_block_add_store_index(ctx, block, &lhs_deref, NULL, rhs, 0, &rhs->loc); } +static struct hlsl_ir_node *hlsl_new_store_parent(struct hlsl_ctx *ctx, + const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs, + unsigned int writemask, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_store *store; + + VKD3D_ASSERT(!hlsl_deref_is_lowered(lhs)); + VKD3D_ASSERT(lhs->path_len >= path_len); + + if (!(store = hlsl_alloc(ctx, sizeof(*store)))) + return NULL; + init_node(&store->node, HLSL_IR_STORE, NULL, loc); + + if (!hlsl_init_deref(ctx, &store->lhs, lhs->var, path_len)) + { + vkd3d_free(store); + return NULL; + } + for (unsigned int i = 0; i < path_len; ++i) + hlsl_src_from_node(&store->lhs.path[i], lhs->path[i].node); + + hlsl_src_from_node(&store->rhs, rhs); + + if (!writemask && type_is_single_reg(rhs->data_type)) + writemask = (1 << rhs->data_type->e.numeric.dimx) - 1; + store->writemask = writemask; + + return &store->node; +} + +void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs, + unsigned int writemask, const struct vkd3d_shader_location *loc) +{ + append_new_instr(ctx, block, hlsl_new_store_parent(ctx, lhs, path_len, rhs, writemask, loc)); +} + void hlsl_block_add_store_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *lhs, unsigned int comp, struct hlsl_ir_node *rhs) { @@ -4957,6 +4994,7 @@ int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d if (target_type == VKD3D_SHADER_TARGET_SPIRV_BINARY || target_type == VKD3D_SHADER_TARGET_SPIRV_TEXT + || target_type == VKD3D_SHADER_TARGET_GLSL || target_type == VKD3D_SHADER_TARGET_D3D_ASM) { uint64_t config_flags = vkd3d_shader_init_config_flags(); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index 1d78c5622de..fafa5740963 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -1565,6 +1565,9 @@ void hlsl_block_add_store_component(struct hlsl_ctx *ctx, struct hlsl_block *blo void hlsl_block_add_store_index(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *lhs, struct hlsl_ir_node *idx, struct hlsl_ir_node *rhs, unsigned int writemask, const struct vkd3d_shader_location *loc); +void hlsl_block_add_store_parent(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct hlsl_deref *lhs, unsigned int path_len, struct hlsl_ir_node *rhs, + unsigned int writemask, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_swizzle(struct hlsl_ctx *ctx, struct hlsl_block *block, uint32_t s, unsigned int width, struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_block_add_uint_constant(struct hlsl_ctx *ctx, struct hlsl_block *block, diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 8fcf6e6ac54..ba56ba90403 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -1916,12 +1916,6 @@ static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx, if (!nonconst_index_from_deref(ctx, deref, &nonconst_i, &base, &scale, &count)) return false; - if (hlsl_version_lt(ctx, 4, 0)) - { - TRACE("Non-constant index propagation is not yet supported for SM1.\n"); - return false; - } - VKD3D_ASSERT(count); hlsl_block_init(&block); @@ -1950,6 +1944,12 @@ static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx, else if (x != idx->src.var) goto done; + if (hlsl_version_lt(ctx, 4, 0) && x->is_uniform && ctx->profile->type != VKD3D_SHADER_TYPE_VERTEX) + { + TRACE("Skipping propagating non-constant deref to SM1 uniform %s.\n", var->name); + goto done; + } + if (i == 0) { path_len = idx->src.path_len; @@ -2184,6 +2184,9 @@ static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, return false; VKD3D_ASSERT(value->component == 0); + /* A uniform object should have never been written to. */ + VKD3D_ASSERT(!deref->var->is_uniform); + /* Only HLSL_IR_LOAD can produce an object. */ load = hlsl_ir_load(value->node); @@ -2488,6 +2491,554 @@ enum validation_result DEREF_VALIDATION_NOT_CONSTANT, }; +struct vectorize_exprs_state +{ + struct vectorizable_exprs_group + { + struct hlsl_block *block; + struct hlsl_ir_expr *exprs[4]; + uint8_t expr_count, component_count; + } *groups; + size_t count, capacity; +}; + +static bool is_same_vectorizable_source(struct hlsl_ir_node *a, struct hlsl_ir_node *b) +{ + /* TODO: We can also vectorize different constants. */ + + if (a->type == HLSL_IR_SWIZZLE) + a = hlsl_ir_swizzle(a)->val.node; + if (b->type == HLSL_IR_SWIZZLE) + b = hlsl_ir_swizzle(b)->val.node; + + return a == b; +} + +static bool is_same_vectorizable_expr(struct hlsl_ir_expr *a, struct hlsl_ir_expr *b) +{ + if (a->op != b->op) + return false; + + for (size_t j = 0; j < HLSL_MAX_OPERANDS; ++j) + { + if (!a->operands[j].node) + break; + if (!is_same_vectorizable_source(a->operands[j].node, b->operands[j].node)) + return false; + } + + return true; +} + +static void record_vectorizable_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_expr *expr, struct vectorize_exprs_state *state) +{ + if (expr->node.data_type->class > HLSL_CLASS_VECTOR) + return; + + /* These are the only current ops that are not per-component. */ + if (expr->op == HLSL_OP1_COS_REDUCED || expr->op == HLSL_OP1_SIN_REDUCED + || expr->op == HLSL_OP2_DOT || expr->op == HLSL_OP3_DP2ADD) + return; + + for (size_t i = 0; i < state->count; ++i) + { + struct vectorizable_exprs_group *group = &state->groups[i]; + struct hlsl_ir_expr *other = group->exprs[0]; + + /* These are SSA instructions, which means they have the same value + * regardless of what block they're in. However, being in different + * blocks may mean that one expression or the other is not always + * executed. */ + + if (expr->node.data_type->e.numeric.dimx + group->component_count <= 4 + && group->block == block + && is_same_vectorizable_expr(expr, other)) + { + group->exprs[group->expr_count++] = expr; + group->component_count += expr->node.data_type->e.numeric.dimx; + return; + } + } + + if (!hlsl_array_reserve(ctx, (void **)&state->groups, + &state->capacity, state->count + 1, sizeof(*state->groups))) + return; + state->groups[state->count].block = block; + state->groups[state->count].exprs[0] = expr; + state->groups[state->count].expr_count = 1; + state->groups[state->count].component_count = expr->node.data_type->e.numeric.dimx; + ++state->count; +} + +static void find_vectorizable_expr_groups(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct vectorize_exprs_state *state) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->type == HLSL_IR_EXPR) + { + record_vectorizable_expr(ctx, block, hlsl_ir_expr(instr), state); + } + else if (instr->type == HLSL_IR_IF) + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + find_vectorizable_expr_groups(ctx, &iff->then_block, state); + find_vectorizable_expr_groups(ctx, &iff->else_block, state); + } + else if (instr->type == HLSL_IR_LOOP) + { + find_vectorizable_expr_groups(ctx, &hlsl_ir_loop(instr)->body, state); + } + else if (instr->type == HLSL_IR_SWITCH) + { + struct hlsl_ir_switch *s = hlsl_ir_switch(instr); + struct hlsl_ir_switch_case *c; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + find_vectorizable_expr_groups(ctx, &c->body, state); + } + } +} + +/* Combine sequences like + * + * 3: @1.x + * 4: @2.x + * 5: @3 * @4 + * 6: @1.y + * 7: @2.x + * 8: @6 * @7 + * + * into + * + * 5_1: @1.xy + * 5_2: @2.xx + * 5_3: @5_1 * @5_2 + * 5: @5_3.x + * 8: @5_3.y + * + * Each operand to an expression needs to refer to the same ultimate source + * (in this case @1 and @2 respectively), but can be a swizzle thereof. + * + * In practice the swizzles @5 and @8 can generally then be vectorized again, + * either as part of another expression, or as part of a store. + */ +static bool vectorize_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + struct vectorize_exprs_state state = {0}; + bool progress = false; + + find_vectorizable_expr_groups(ctx, block, &state); + + for (unsigned int i = 0; i < state.count; ++i) + { + struct vectorizable_exprs_group *group = &state.groups[i]; + struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0}; + uint32_t swizzles[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_node *arg, *combined; + unsigned int component_count = 0; + struct hlsl_type *combined_type; + struct hlsl_block new_block; + struct hlsl_ir_expr *expr; + + if (group->expr_count == 1) + continue; + + hlsl_block_init(&new_block); + + for (unsigned int j = 0; j < group->expr_count; ++j) + { + expr = group->exprs[j]; + + for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a) + { + uint32_t arg_swizzle; + + if (!(arg = expr->operands[a].node)) + break; + + if (arg->type == HLSL_IR_SWIZZLE) + arg_swizzle = hlsl_ir_swizzle(arg)->u.vector; + else + arg_swizzle = HLSL_SWIZZLE(X, Y, Z, W); + + /* Mask out the invalid components. */ + arg_swizzle &= (1u << VKD3D_SHADER_SWIZZLE_SHIFT(arg->data_type->e.numeric.dimx)) - 1; + swizzles[a] |= arg_swizzle << VKD3D_SHADER_SWIZZLE_SHIFT(component_count); + } + + component_count += expr->node.data_type->e.numeric.dimx; + } + + expr = group->exprs[0]; + for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a) + { + if (!(arg = expr->operands[a].node)) + break; + if (arg->type == HLSL_IR_SWIZZLE) + arg = hlsl_ir_swizzle(arg)->val.node; + args[a] = hlsl_block_add_swizzle(ctx, &new_block, swizzles[a], component_count, arg, &arg->loc); + } + + combined_type = hlsl_get_vector_type(ctx, expr->node.data_type->e.numeric.type, component_count); + combined = hlsl_block_add_expr(ctx, &new_block, expr->op, args, combined_type, &expr->node.loc); + + list_move_before(&expr->node.entry, &new_block.instrs); + + TRACE("Combining %u %s instructions into %p.\n", group->expr_count, + debug_hlsl_expr_op(group->exprs[0]->op), combined); + + component_count = 0; + for (unsigned int j = 0; j < group->expr_count; ++j) + { + struct hlsl_ir_node *replacement; + + expr = group->exprs[j]; + + if (!(replacement = hlsl_new_swizzle(ctx, + HLSL_SWIZZLE(X, Y, Z, W) >> VKD3D_SHADER_SWIZZLE_SHIFT(component_count), + expr->node.data_type->e.numeric.dimx, combined, &expr->node.loc))) + goto out; + component_count += expr->node.data_type->e.numeric.dimx; + list_add_before(&expr->node.entry, &replacement->entry); + hlsl_replace_node(&expr->node, replacement); + } + + progress = true; + } + +out: + vkd3d_free(state.groups); + return progress; +} + +struct vectorize_stores_state +{ + struct vectorizable_stores_group + { + struct hlsl_block *block; + /* We handle overlapping stores, because it's not really easier not to. + * In theory, then, we could collect an arbitrary number of stores here. + * + * In practice, overlapping stores are unlikely, and of course at most + * 4 stores can appear without overlap. Therefore, for simplicity, we + * just use a fixed array of 4. + * + * Since computing the writemask requires traversing the deref, and we + * need to do that anyway, we store it here for convenience. */ + struct hlsl_ir_store *stores[4]; + unsigned int path_len; + uint8_t writemasks[4]; + uint8_t store_count; + bool dirty; + } *groups; + size_t count, capacity; +}; + +/* This must be a store to a subsection of a vector. + * In theory we can also vectorize stores to packed struct fields, + * but this requires target-specific knowledge and is probably best left + * to a VSIR pass. */ +static bool can_vectorize_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, + unsigned int *path_len, uint8_t *writemask) +{ + struct hlsl_type *type = store->lhs.var->data_type; + unsigned int i; + + if (store->rhs.node->data_type->class > HLSL_CLASS_VECTOR) + return false; + + if (type->class == HLSL_CLASS_SCALAR) + return false; + + for (i = 0; type->class != HLSL_CLASS_VECTOR && i < store->lhs.path_len; ++i) + type = hlsl_get_element_type_from_path_index(ctx, type, store->lhs.path[i].node); + + if (type->class != HLSL_CLASS_VECTOR) + return false; + + *path_len = i; + + if (i < store->lhs.path_len) + { + struct hlsl_ir_constant *c; + + /* This is a store to a scalar component of a vector, achieved via + * indexing. */ + + if (store->lhs.path[i].node->type != HLSL_IR_CONSTANT) + return false; + c = hlsl_ir_constant(store->lhs.path[i].node); + *writemask = (1u << c->value.u[0].u); + } + else + { + *writemask = store->writemask; + } + + return true; +} + +static bool derefs_are_same_vector(struct hlsl_ctx *ctx, const struct hlsl_deref *a, const struct hlsl_deref *b) +{ + struct hlsl_type *type = a->var->data_type; + + if (a->var != b->var) + return false; + + for (unsigned int i = 0; type->class != HLSL_CLASS_VECTOR && i < a->path_len && i < b->path_len; ++i) + { + if (a->path[i].node != b->path[i].node) + return false; + type = hlsl_get_element_type_from_path_index(ctx, type, a->path[i].node); + } + + return true; +} + +static void record_vectorizable_store(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_ir_store *store, struct vectorize_stores_state *state) +{ + unsigned int path_len; + uint8_t writemask; + + if (!can_vectorize_store(ctx, store, &path_len, &writemask)) + { + /* In the case of a dynamically indexed vector, we must invalidate + * any groups that statically index the same vector. + * For the sake of expediency, we go one step further and invalidate + * any groups that store to the same variable. + * (We also don't check that that was the reason why this store isn't + * vectorizable.) + * We could be more granular, but we'll defer that until it comes + * up in practice. */ + for (size_t i = 0; i < state->count; ++i) + { + if (state->groups[i].stores[0]->lhs.var == store->lhs.var) + state->groups[i].dirty = true; + } + return; + } + + for (size_t i = 0; i < state->count; ++i) + { + struct vectorizable_stores_group *group = &state->groups[i]; + struct hlsl_ir_store *other = group->stores[0]; + + if (group->dirty) + continue; + + if (derefs_are_same_vector(ctx, &store->lhs, &other->lhs)) + { + /* Stores must be in the same CFG block. If they're not, + * they're not executed in exactly the same flow, and + * therefore can't be vectorized. */ + if (group->block == block + && is_same_vectorizable_source(store->rhs.node, other->rhs.node)) + { + if (group->store_count < ARRAY_SIZE(group->stores)) + { + group->stores[group->store_count] = store; + group->writemasks[group->store_count] = writemask; + ++group->store_count; + return; + } + } + else + { + /* A store to the same vector with a different source, or in + * a different CFG block, invalidates any earlier store. + * + * A store to a component which *contains* the vector in + * question would also invalidate, but we should have split all + * of those by the time we get here. */ + group->dirty = true; + + /* Note that we do exit this loop early if we find a store A we + * can vectorize with, but that's fine. If there was a store B + * also in the state that we can't vectorize with, it would + * already have invalidated A. */ + } + } + else + { + /* This could still be a store to the same vector, if e.g. the + * vector is part of a dynamically indexed array, or the path has + * two equivalent instructions which refer to the same component. + * [CSE may help with the latter, but we don't have it yet, + * and we shouldn't depend on it anyway.] + * For the sake of expediency, we just invalidate it if it refers + * to the same variable at all. + * As above, we could be more granular, but we'll defer that until + * it comes up in practice. */ + if (store->lhs.var == other->lhs.var) + group->dirty = true; + + /* As above, we don't need to worry about exiting the loop early. */ + } + } + + if (!hlsl_array_reserve(ctx, (void **)&state->groups, + &state->capacity, state->count + 1, sizeof(*state->groups))) + return; + state->groups[state->count].block = block; + state->groups[state->count].stores[0] = store; + state->groups[state->count].path_len = path_len; + state->groups[state->count].writemasks[0] = writemask; + state->groups[state->count].store_count = 1; + state->groups[state->count].dirty = false; + ++state->count; +} + +static void find_vectorizable_store_groups(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct vectorize_stores_state *state) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->type == HLSL_IR_STORE) + { + record_vectorizable_store(ctx, block, hlsl_ir_store(instr), state); + } + else if (instr->type == HLSL_IR_LOAD) + { + struct hlsl_ir_var *var = hlsl_ir_load(instr)->src.var; + + /* By vectorizing store A with store B, we are effectively moving + * store A down to happen at the same time as store B. + * If there was a load of the same variable between the two, this + * would be incorrect. + * Therefore invalidate all stores to this variable. As above, we + * could be more granular if necessary. */ + + for (unsigned int i = 0; i < state->count; ++i) + { + if (state->groups[i].stores[0]->lhs.var == var) + state->groups[i].dirty = true; + } + } + else if (instr->type == HLSL_IR_IF) + { + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + find_vectorizable_store_groups(ctx, &iff->then_block, state); + find_vectorizable_store_groups(ctx, &iff->else_block, state); + } + else if (instr->type == HLSL_IR_LOOP) + { + find_vectorizable_store_groups(ctx, &hlsl_ir_loop(instr)->body, state); + } + else if (instr->type == HLSL_IR_SWITCH) + { + struct hlsl_ir_switch *s = hlsl_ir_switch(instr); + struct hlsl_ir_switch_case *c; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + find_vectorizable_store_groups(ctx, &c->body, state); + } + } +} + +/* Combine sequences like + * + * 2: @1.yw + * 3: @1.zy + * 4: var.xy = @2 + * 5: var.yw = @3 + * + * to + * + * 2: @1.yzy + * 5: var.xyw = @2 + * + * There are a lot of gotchas here. We need to make sure the two stores are to + * the same vector (which may be embedded in a complex variable), that they're + * always executed in the same control flow, and that there aren't any other + * stores or loads on the same vector in the middle. */ +static bool vectorize_stores(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + struct vectorize_stores_state state = {0}; + bool progress = false; + + find_vectorizable_store_groups(ctx, block, &state); + + for (unsigned int i = 0; i < state.count; ++i) + { + struct vectorizable_stores_group *group = &state.groups[i]; + uint32_t new_swizzle = 0, new_writemask = 0; + struct hlsl_ir_node *new_rhs, *value; + uint32_t swizzle_components[4]; + unsigned int component_count; + struct hlsl_ir_store *store; + struct hlsl_block new_block; + + if (group->store_count == 1) + continue; + + hlsl_block_init(&new_block); + + /* Compute the swizzle components. */ + for (unsigned int j = 0; j < group->store_count; ++j) + { + unsigned int writemask = group->writemasks[j]; + uint32_t rhs_swizzle; + + store = group->stores[j]; + + if (store->rhs.node->type == HLSL_IR_SWIZZLE) + rhs_swizzle = hlsl_ir_swizzle(store->rhs.node)->u.vector; + else + rhs_swizzle = HLSL_SWIZZLE(X, Y, Z, W); + + component_count = 0; + for (unsigned int k = 0; k < 4; ++k) + { + if (writemask & (1u << k)) + swizzle_components[k] = hlsl_swizzle_get_component(rhs_swizzle, component_count++); + } + + new_writemask |= writemask; + } + + /* Construct the new swizzle. */ + component_count = 0; + for (unsigned int k = 0; k < 4; ++k) + { + if (new_writemask & (1u << k)) + hlsl_swizzle_set_component(&new_swizzle, component_count++, swizzle_components[k]); + } + + store = group->stores[0]; + value = store->rhs.node; + if (value->type == HLSL_IR_SWIZZLE) + value = hlsl_ir_swizzle(value)->val.node; + + new_rhs = hlsl_block_add_swizzle(ctx, &new_block, new_swizzle, component_count, value, &value->loc); + hlsl_block_add_store_parent(ctx, &new_block, &store->lhs, + group->path_len, new_rhs, new_writemask, &store->node.loc); + + TRACE("Combining %u stores to %s.\n", group->store_count, store->lhs.var->name); + + list_move_before(&group->stores[group->store_count - 1]->node.entry, &new_block.instrs); + + for (unsigned int j = 0; j < group->store_count; ++j) + { + list_remove(&group->stores[j]->node.entry); + hlsl_free_instr(&group->stores[j]->node); + } + + progress = true; + } + + vkd3d_free(state.groups); + return progress; +} + static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) { @@ -3123,6 +3674,11 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc return false; } +static bool deref_supports_sm1_indirect_addressing(struct hlsl_ctx *ctx, const struct hlsl_deref *deref) +{ + return ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && deref->var->is_uniform; +} + /* This pass flattens array (and row_major matrix) loads that include the indexing of a non-constant * index into multiple constant loads, where the value of only one of them ends up in the resulting * node. @@ -3149,6 +3705,9 @@ static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_n if (deref->path_len == 0) return false; + if (deref_supports_sm1_indirect_addressing(ctx, deref)) + return false; + for (i = deref->path_len - 1; ; --i) { if (deref->path[i].node->type != HLSL_IR_CONSTANT) @@ -7839,7 +8398,8 @@ static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, st if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref)) return false; - src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); + if (src_param->reg.dimension != VSIR_DIMENSION_NONE) + src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); return true; } @@ -7869,7 +8429,6 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_constant *constant) { struct hlsl_ir_node *instr = &constant->node; - struct vkd3d_shader_dst_param *dst_param; struct vkd3d_shader_src_param *src_param; struct vkd3d_shader_instruction *ins; @@ -7881,13 +8440,11 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx, src_param = &ins->src[0]; vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param->reg.idx[0].offset = constant->reg.id; src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask); - dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - dst_param->reg.idx[0].offset = instr->reg.id; - dst_param->write_mask = instr->reg.writemask; + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); } static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx, @@ -7974,11 +8531,13 @@ static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx dst_param = &ins->dst[0]; vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); dst_param->reg.idx[0].offset = instr->reg.id; + dst_param->reg.dimension = VSIR_DIMENSION_VEC4; dst_param->write_mask = 1u << i; src_param = &ins->src[0]; vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); src_param->reg.idx[0].offset = operand->reg.id; + src_param->reg.dimension = VSIR_DIMENSION_VEC4; c = vsir_swizzle_get_component(src_swizzle, i); src_param->swizzle = vsir_swizzle_from_writemask(1u << c); } @@ -7990,7 +8549,6 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi { struct hlsl_ir_node *operand = expr->operands[0].node; struct hlsl_ir_node *instr = &expr->node; - struct vkd3d_shader_dst_param *dst_param; struct vkd3d_shader_src_param *src_param; struct vkd3d_shader_instruction *ins; unsigned int src_count = 0; @@ -8001,25 +8559,20 @@ static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsi if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SINCOS, 1, src_count))) return; - dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - dst_param->reg.idx[0].offset = instr->reg.id; - dst_param->write_mask = instr->reg.writemask; - - src_param = &ins->src[0]; - vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - src_param->reg.idx[0].offset = operand->reg.id; - src_param->swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, VKD3DSP_WRITEMASK_ALL); + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, VKD3DSP_WRITEMASK_ALL); if (ctx->profile->major_version < 3) { src_param = &ins->src[1]; vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param->reg.idx[0].offset = ctx->d3dsincosconst1.id; src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; src_param = &ins->src[2]; vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1); + src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param->reg.idx[0].offset = ctx->d3dsincosconst2.id; src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE; } @@ -8341,19 +8894,68 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, else VKD3D_ASSERT(reg.allocated); - vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); + if (type == VKD3DSPR_DEPTHOUT) + { + vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0); + dst_param->reg.dimension = VSIR_DIMENSION_SCALAR; + } + else + { + vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1); + dst_param->reg.idx[0].offset = register_index; + dst_param->reg.dimension = VSIR_DIMENSION_VEC4; + } dst_param->write_mask = writemask; - dst_param->reg.idx[0].offset = register_index; if (deref->rel_offset.node) hlsl_fixme(ctx, loc, "Translate relative addressing on dst register for vsir."); } +static void sm1_generate_vsir_instr_mova(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_node *instr) +{ + enum vkd3d_shader_opcode opcode = hlsl_version_ge(ctx, 2, 0) ? VKD3DSIH_MOVA : VKD3DSIH_MOV; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + + VKD3D_ASSERT(instr->reg.allocated); + + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 1))) + return; + + dst_param = &ins->dst[0]; + vsir_register_init(&dst_param->reg, VKD3DSPR_ADDR, VKD3D_DATA_FLOAT, 0); + dst_param->write_mask = VKD3DSP_WRITEMASK_0; + + VKD3D_ASSERT(instr->data_type->class <= HLSL_CLASS_VECTOR); + VKD3D_ASSERT(instr->data_type->e.numeric.dimx == 1); + vsir_src_from_hlsl_node(&ins->src[0], ctx, instr, VKD3DSP_WRITEMASK_ALL); +} + +static struct vkd3d_shader_src_param *sm1_generate_vsir_new_address_src(struct hlsl_ctx *ctx, + struct vsir_program *program) +{ + struct vkd3d_shader_src_param *idx_src; + + if (!(idx_src = vsir_program_get_src_params(program, 1))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return NULL; + } + + memset(idx_src, 0, sizeof(*idx_src)); + vsir_register_init(&idx_src->reg, VKD3DSPR_ADDR, VKD3D_DATA_FLOAT, 0); + idx_src->reg.dimension = VSIR_DIMENSION_VEC4; + idx_src->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + return idx_src; +} + static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, - struct vkd3d_shader_src_param *src_param, struct hlsl_deref *deref, - unsigned int dst_writemask, const struct vkd3d_shader_location *loc) + struct vsir_program *program, struct vkd3d_shader_src_param *src_param, + struct hlsl_deref *deref, uint32_t dst_writemask, const struct vkd3d_shader_location *loc) { enum vkd3d_shader_register_type type = VKD3DSPR_TEMP; + struct vkd3d_shader_src_param *src_rel_addr = NULL; struct vkd3d_shader_version version; uint32_t register_index; unsigned int writemask; @@ -8371,12 +8973,26 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, } else if (deref->var->is_uniform) { + unsigned int offset = deref->const_offset; + type = VKD3DSPR_CONST; + register_index = deref->var->regs[HLSL_REGSET_NUMERIC].id + offset / 4; - reg = hlsl_reg_from_deref(ctx, deref); - register_index = reg.id; - writemask = reg.writemask; - VKD3D_ASSERT(reg.allocated); + writemask = 0xf & (0xf << (offset % 4)); + if (deref->var->regs[HLSL_REGSET_NUMERIC].writemask) + writemask = hlsl_combine_writemasks(deref->var->regs[HLSL_REGSET_NUMERIC].writemask, writemask); + + if (deref->rel_offset.node) + { + VKD3D_ASSERT(deref_supports_sm1_indirect_addressing(ctx, deref)); + + if (!(src_rel_addr = sm1_generate_vsir_new_address_src(ctx, program))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + } + VKD3D_ASSERT(deref->var->regs[HLSL_REGSET_NUMERIC].allocated); } else if (deref->var->is_input_semantic) { @@ -8408,32 +9024,30 @@ static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, } vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1); + src_param->reg.dimension = VSIR_DIMENSION_VEC4; src_param->reg.idx[0].offset = register_index; + src_param->reg.idx[0].rel_addr = src_rel_addr; src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask); - - if (deref->rel_offset.node) - hlsl_fixme(ctx, loc, "Translate relative addressing on src register for vsir."); } static void sm1_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_load *load) { struct hlsl_ir_node *instr = &load->node; - struct vkd3d_shader_dst_param *dst_param; struct vkd3d_shader_instruction *ins; VKD3D_ASSERT(instr->reg.allocated); + if (load->src.rel_offset.node) + sm1_generate_vsir_instr_mova(ctx, program, load->src.rel_offset.node); + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) return; - dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - dst_param->reg.idx[0].offset = instr->reg.id; - dst_param->write_mask = instr->reg.writemask; + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); - sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[0], &load->src, dst_param->write_mask, - &ins->location); + sm1_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[0], + &load->src, ins->dst[0].write_mask, &ins->location); } static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, @@ -8443,7 +9057,6 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, struct hlsl_ir_node *ddx = load->ddx.node; struct hlsl_ir_node *ddy = load->ddy.node; struct hlsl_ir_node *instr = &load->node; - struct vkd3d_shader_dst_param *dst_param; struct vkd3d_shader_src_param *src_param; struct vkd3d_shader_instruction *ins; enum vkd3d_shader_opcode opcode; @@ -8482,15 +9095,12 @@ static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, return; ins->flags = flags; - dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - dst_param->reg.idx[0].offset = instr->reg.id; - dst_param->write_mask = instr->reg.writemask; + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); src_param = &ins->src[0]; vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL); - sm1_generate_vsir_init_src_param_from_deref(ctx, &ins->src[1], &load->resource, + sm1_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1], &load->resource, VKD3DSP_WRITEMASK_ALL, &ins->location); if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD) @@ -8507,7 +9117,6 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr) { struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node; - struct vkd3d_shader_dst_param *dst_param; struct vkd3d_shader_src_param *src_param; struct vkd3d_shader_instruction *ins; uint32_t swizzle; @@ -8517,11 +9126,7 @@ static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx, if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1))) return; - dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1); - dst_param->reg.idx[0].offset = instr->reg.id; - dst_param->reg.dimension = VSIR_DIMENSION_VEC4; - dst_param->write_mask = instr->reg.writemask; + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); swizzle = hlsl_swizzle_from_writemask(val->reg.writemask); swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->e.numeric.dimx); @@ -8557,7 +9162,6 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, { struct hlsl_ir_node *condition = jump->condition.node; struct hlsl_ir_node *instr = &jump->node; - struct vkd3d_shader_dst_param *dst_param; struct vkd3d_shader_instruction *ins; if (jump->type == HLSL_IR_JUMP_DISCARD_NEG) @@ -8565,10 +9169,7 @@ static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx, if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 1, 0))) return; - dst_param = &ins->dst[0]; - vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); - dst_param->reg.idx[0].offset = condition->reg.id; - dst_param->write_mask = condition->reg.writemask; + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, condition); } else { @@ -8689,6 +9290,10 @@ static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl return; } + program->temp_count = allocate_temp_registers(ctx, entry_func); + if (ctx->result) + return; + generate_vsir_signature(ctx, program, entry_func); hlsl_block_init(&block); @@ -12532,6 +13137,7 @@ static void process_entry_function(struct hlsl_ctx *ctx, struct recursive_call_ctx recursive_call_ctx; struct hlsl_ir_var *var; unsigned int i; + bool progress; ctx->is_patch_constant_func = entry_func == ctx->patch_constant_func; @@ -12709,6 +13315,9 @@ static void process_entry_function(struct hlsl_ctx *ctx, hlsl_transform_ir(ctx, lower_resource_load_bias, body, NULL); } + compute_liveness(ctx, entry_func); + transform_derefs(ctx, divert_written_uniform_derefs_to_temp, &entry_func->body); + loop_unrolling_execute(ctx, body); hlsl_run_const_passes(ctx, body); @@ -12719,13 +13328,21 @@ static void process_entry_function(struct hlsl_ctx *ctx, lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_int_dot, body); - compute_liveness(ctx, entry_func); - transform_derefs(ctx, divert_written_uniform_derefs_to_temp, &entry_func->body); - if (hlsl_version_lt(ctx, 4, 0)) hlsl_transform_ir(ctx, lower_separate_samples, body, NULL); hlsl_transform_ir(ctx, validate_dereferences, body, NULL); + + do + { + progress = vectorize_exprs(ctx, body); + compute_liveness(ctx, entry_func); + progress |= hlsl_transform_ir(ctx, dce, body, NULL); + progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL); + progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL); + progress |= vectorize_stores(ctx, body); + } while (progress); + hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); if (hlsl_version_ge(ctx, 4, 0)) @@ -12847,7 +13464,6 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry if (profile->major_version < 4) { mark_indexable_vars(ctx, entry_func); - allocate_temp_registers(ctx, entry_func); allocate_const_registers(ctx, entry_func); sort_uniforms_by_bind_count(ctx, HLSL_REGSET_SAMPLERS); allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS); diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index 3a784c71388..72cf53761e4 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -470,6 +470,80 @@ static bool get_opcode_from_rel_op(enum vkd3d_shader_rel_op rel_op, enum vkd3d_d return false; } +static enum vkd3d_result vsir_program_normalize_addr(struct vsir_program *program, + struct vsir_transformation_context *ctx) +{ + struct vkd3d_shader_instruction *ins, *ins2; + unsigned int tmp_idx = ~0u; + unsigned int i, k, r; + + for (i = 0; i < program->instructions.count; ++i) + { + ins = &program->instructions.elements[i]; + + if (ins->opcode == VKD3DSIH_MOV && ins->dst[0].reg.type == VKD3DSPR_ADDR) + { + if (tmp_idx == ~0u) + tmp_idx = program->temp_count++; + + ins->opcode = VKD3DSIH_FTOU; + vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins->dst[0].reg.idx[0].offset = tmp_idx; + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + } + else if (ins->opcode == VKD3DSIH_MOVA) + { + if (tmp_idx == ~0u) + tmp_idx = program->temp_count++; + + if (!shader_instruction_array_insert_at(&program->instructions, i + 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins = &program->instructions.elements[i]; + ins2 = &program->instructions.elements[i + 1]; + + ins->opcode = VKD3DSIH_ROUND_NE; + vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = tmp_idx; + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + + if (!vsir_instruction_init_with_params(program, ins2, &ins->location, VKD3DSIH_FTOU, 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_register_init(&ins2->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + ins2->dst[0].reg.idx[0].offset = tmp_idx; + ins2->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins2->dst[0].write_mask = ins->dst[0].write_mask; + + vsir_register_init(&ins2->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + ins2->src[0].reg.idx[0].offset = tmp_idx; + ins2->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins2->src[0].swizzle = vsir_swizzle_from_writemask(ins2->dst[0].write_mask); + } + + for (k = 0; k < ins->src_count; ++k) + { + struct vkd3d_shader_src_param *src = &ins->src[k]; + + for (r = 0; r < src->reg.idx_count; ++r) + { + struct vkd3d_shader_src_param *rel = src->reg.idx[r].rel_addr; + + if (rel && rel->reg.type == VKD3DSPR_ADDR) + { + if (tmp_idx == ~0u) + tmp_idx = program->temp_count++; + + vsir_register_init(&rel->reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); + rel->reg.idx[0].offset = tmp_idx; + rel->reg.dimension = VSIR_DIMENSION_VEC4; + } + } + } + } + + return VKD3D_OK; +} + static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, struct vkd3d_shader_instruction *ifc, unsigned int *tmp_idx, struct vkd3d_shader_message_context *message_context) @@ -482,6 +556,7 @@ static enum vkd3d_result vsir_program_lower_ifc(struct vsir_program *program, if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) return VKD3D_ERROR_OUT_OF_MEMORY; + ifc = &instructions->elements[pos]; if (*tmp_idx == ~0u) *tmp_idx = program->temp_count++; @@ -535,6 +610,7 @@ static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1)) return VKD3D_ERROR_OUT_OF_MEMORY; + texkill = &instructions->elements[pos]; if (*tmp_idx == ~0u) *tmp_idx = program->temp_count++; @@ -621,6 +697,7 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) return VKD3D_ERROR_OUT_OF_MEMORY; + mad = &instructions->elements[pos]; if (*tmp_idx == ~0u) *tmp_idx = program->temp_count++; @@ -665,6 +742,7 @@ static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *prog if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) return VKD3D_ERROR_OUT_OF_MEMORY; + sincos = &instructions->elements[pos]; ins = &instructions->elements[pos + 1]; @@ -717,6 +795,7 @@ static enum vkd3d_result vsir_program_lower_texldp(struct vsir_program *program, if (!shader_instruction_array_insert_at(instructions, pos + 1, 2)) return VKD3D_ERROR_OUT_OF_MEMORY; + tex = &instructions->elements[pos]; if (*tmp_idx == ~0u) *tmp_idx = program->temp_count++; @@ -1128,6 +1207,7 @@ static enum vkd3d_result vsir_program_ensure_diffuse(struct vsir_program *progra if (!shader_instruction_array_insert_at(&program->instructions, i, 1)) return VKD3D_ERROR_OUT_OF_MEMORY; ins = &program->instructions.elements[i]; + vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); vsir_dst_param_init(&ins->dst[0], VKD3DSPR_ATTROUT, VKD3D_DATA_FLOAT, 1); ins->dst[0].reg.idx[0].offset = 0; @@ -1346,7 +1426,6 @@ static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program loc = ins->location; if (!shader_instruction_array_insert_at(&program->instructions, i, uninit_varying_count)) return VKD3D_ERROR_OUT_OF_MEMORY; - ins = &program->instructions.elements[i]; for (unsigned int j = signature->element_count - uninit_varying_count; j < signature->element_count; ++j) @@ -2426,7 +2505,8 @@ struct flat_constants_normaliser }; static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg, - enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index) + enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index, + struct vkd3d_shader_src_param **rel_addr) { static const struct { @@ -2446,12 +2526,8 @@ static bool get_flat_constant_register_type(const struct vkd3d_shader_register * { if (reg->type == regs[i].type) { - if (reg->idx[0].rel_addr) - { - FIXME("Unhandled relative address.\n"); - return false; - } - + if (rel_addr) + *rel_addr = reg->idx[0].rel_addr; *set = regs[i].set; *index = reg->idx[0].offset; return true; @@ -2465,10 +2541,11 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par const struct flat_constants_normaliser *normaliser) { enum vkd3d_shader_d3dbc_constant_register set; + struct vkd3d_shader_src_param *rel_addr; uint32_t index; size_t i, j; - if (!get_flat_constant_register_type(¶m->reg, &set, &index)) + if (!get_flat_constant_register_type(¶m->reg, &set, &index, &rel_addr)) return; for (i = 0; i < normaliser->def_count; ++i) @@ -2486,8 +2563,11 @@ static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_par param->reg.type = VKD3DSPR_CONSTBUFFER; param->reg.idx[0].offset = set; /* register ID */ + param->reg.idx[0].rel_addr = NULL; param->reg.idx[1].offset = set; /* register index */ + param->reg.idx[1].rel_addr = NULL; param->reg.idx[2].offset = index; /* buffer index */ + param->reg.idx[2].rel_addr = rel_addr; param->reg.idx_count = 3; } @@ -2514,7 +2594,7 @@ static enum vkd3d_result vsir_program_normalise_flat_constants(struct vsir_progr def = &normaliser.defs[normaliser.def_count++]; - get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index); + get_flat_constant_register_type(&ins->dst[0].reg, &def->set, &def->index, NULL); for (j = 0; j < 4; ++j) def->value[j] = ins->src[0].reg.u.immconst_u32[j]; @@ -6037,6 +6117,7 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr uint32_t colour_temp, size_t *ret_pos, struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_instruction_array *instructions = &program->instructions; + const struct vkd3d_shader_location loc = ret->location; static const struct vkd3d_shader_location no_loc; size_t pos = ret - instructions->elements; struct vkd3d_shader_instruction *ins; @@ -6061,9 +6142,10 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr { if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) return VKD3D_ERROR_OUT_OF_MEMORY; + ret = NULL; ins = &program->instructions.elements[pos]; - vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DISCARD, 0, 1); ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; src_param_init_const_uint(&ins->src[0], 0); @@ -6073,20 +6155,20 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr if (!shader_instruction_array_insert_at(&program->instructions, pos, 3)) return VKD3D_ERROR_OUT_OF_MEMORY; - + ret = NULL; ins = &program->instructions.elements[pos]; switch (ref->data_type) { case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32: - vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].float_opcode, 1, 2); + vsir_instruction_init_with_params(program, ins, &loc, opcodes[compare_func].float_opcode, 1, 2); src_param_init_temp_float(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_FLOAT); break; case VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32: - vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].uint_opcode, 1, 2); + vsir_instruction_init_with_params(program, ins, &loc, opcodes[compare_func].uint_opcode, 1, 2); src_param_init_temp_uint(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); @@ -6107,14 +6189,14 @@ static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *progr ins->src[opcodes[compare_func].swap ? 1 : 0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); ++ins; - vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DISCARD, 0, 1); ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; src_param_init_ssa_bool(&ins->src[0], program->ssa_count); ++program->ssa_count; ++ins; - vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); ins->dst[0].reg.idx[0].offset = colour_signature_idx; ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; @@ -6215,13 +6297,14 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog uint32_t position_temp, uint32_t low_signature_idx, uint32_t high_signature_idx, size_t *ret_pos) { struct vkd3d_shader_instruction_array *instructions = &program->instructions; + const struct vkd3d_shader_location loc = ret->location; size_t pos = ret - instructions->elements; struct vkd3d_shader_instruction *ins; unsigned int output_idx = 0; if (!shader_instruction_array_insert_at(&program->instructions, pos, vkd3d_popcount(mask) + 1)) return VKD3D_ERROR_OUT_OF_MEMORY; - + ret = NULL; ins = &program->instructions.elements[pos]; for (unsigned int i = 0; i < 8; ++i) @@ -6229,7 +6312,7 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog if (!(mask & (1u << i))) continue; - vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DP4, 1, 2); + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_DP4, 1, 2); src_param_init_temp_float4(&ins->src[0], position_temp); src_param_init_parameter(&ins->src[1], VKD3D_SHADER_PARAMETER_NAME_CLIP_PLANE_0 + i, VKD3D_DATA_FLOAT); ins->src[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; @@ -6247,7 +6330,7 @@ static enum vkd3d_result insert_clip_planes_before_ret(struct vsir_program *prog ++ins; } - vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); ins->dst[0].reg.idx[0].offset = position_signature_idx; ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; @@ -6404,15 +6487,16 @@ static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *progr const struct vkd3d_shader_instruction *ret, size_t *ret_pos) { struct vkd3d_shader_instruction_array *instructions = &program->instructions; + const struct vkd3d_shader_location loc = ret->location; size_t pos = ret - instructions->elements; struct vkd3d_shader_instruction *ins; if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) return VKD3D_ERROR_OUT_OF_MEMORY; - + ret = NULL; ins = &program->instructions.elements[pos]; - vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); vsir_dst_param_init(&ins->dst[0], VKD3DSPR_RASTOUT, VKD3D_DATA_FLOAT, 1); ins->dst[0].reg.idx[0].offset = VSIR_RASTOUT_POINT_SIZE; src_param_init_parameter(&ins->src[0], VKD3D_SHADER_PARAMETER_NAME_POINT_SIZE, VKD3D_DATA_FLOAT); @@ -6541,9 +6625,9 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra if (!shader_instruction_array_insert_at(&program->instructions, i + 1, !!min_parameter + !!max_parameter)) return VKD3D_ERROR_OUT_OF_MEMORY; + ins = &program->instructions.elements[i + 1]; loc = &program->instructions.elements[i].location; - ins = &program->instructions.elements[i + 1]; if (min_parameter) { @@ -6741,7 +6825,6 @@ static enum vkd3d_result vsir_program_insert_point_coord(struct vsir_program *pr { if (!shader_instruction_array_insert_at(&program->instructions, insert_pos, 2)) return VKD3D_ERROR_OUT_OF_MEMORY; - ins = &program->instructions.elements[insert_pos]; vsir_instruction_init_with_params(program, ins, &no_loc, VKD3DSIH_MOV, 1, 1); @@ -6815,6 +6898,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro */ if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) return VKD3D_ERROR_OUT_OF_MEMORY; + ret = NULL; + *ret_pos = pos + 4; ssa_temp = program->ssa_count++; @@ -6845,6 +6930,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro */ if (!shader_instruction_array_insert_at(&program->instructions, pos, 4)) return VKD3D_ERROR_OUT_OF_MEMORY; + ret = NULL; + *ret_pos = pos + 4; ssa_temp = program->ssa_count++; @@ -6875,6 +6962,8 @@ static enum vkd3d_result insert_fragment_fog_before_ret(struct vsir_program *pro */ if (!shader_instruction_array_insert_at(&program->instructions, pos, 5)) return VKD3D_ERROR_OUT_OF_MEMORY; + ret = NULL; + *ret_pos = pos + 5; ssa_temp = program->ssa_count++; @@ -7053,16 +7142,18 @@ static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *progr { const struct signature_element *e = &program->output_signature.elements[source_signature_idx]; struct vkd3d_shader_instruction_array *instructions = &program->instructions; + const struct vkd3d_shader_location loc = ret->location; size_t pos = ret - instructions->elements; struct vkd3d_shader_instruction *ins; if (!shader_instruction_array_insert_at(&program->instructions, pos, 2)) return VKD3D_ERROR_OUT_OF_MEMORY; + ret = NULL; ins = &program->instructions.elements[pos]; /* Write the fog output. */ - vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); dst_param_init_output(&ins->dst[0], VKD3D_DATA_FLOAT, fog_signature_idx, 0x1); src_param_init_temp_float4(&ins->src[0], temp); if (source == VKD3D_SHADER_FOG_SOURCE_Z) @@ -7072,7 +7163,7 @@ static enum vkd3d_result insert_vertex_fog_before_ret(struct vsir_program *progr ++ins; /* Write the position or specular output. */ - vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + vsir_instruction_init_with_params(program, ins, &loc, VKD3DSIH_MOV, 1, 1); dst_param_init_output(&ins->dst[0], vkd3d_data_type_from_component_type(e->component_type), source_signature_idx, e->mask); src_param_init_temp_float4(&ins->src[0], temp); @@ -7707,6 +7798,33 @@ static void vsir_validate_label_register(struct validation_context *ctx, reg->idx[0].offset, ctx->program->block_count); } +static void vsir_validate_descriptor_indices(struct validation_context *ctx, + const struct vkd3d_shader_register *reg, enum vkd3d_shader_descriptor_type type, const char *name) +{ + const struct vkd3d_shader_descriptor_info1 *descriptor; + + if (reg->idx[0].rel_addr) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Non-NULL indirect address for the ID of a register of type \"%s\".", name); + + if (!ctx->program->has_descriptor_info) + return; + + if (!(descriptor = vkd3d_shader_find_descriptor(&ctx->program->descriptors, type, reg->idx[0].offset))) + { + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "No matching descriptor found for register %s%u.", name, reg->idx[0].offset); + return; + } + + if (!reg->idx[1].rel_addr && (reg->idx[1].offset < descriptor->register_index + || reg->idx[1].offset - descriptor->register_index >= descriptor->count)) + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Register index %u doesn't belong to the range [%u, %u] for register %s%u.", + reg->idx[1].offset, descriptor->register_index, + descriptor->register_index + descriptor->count - 1, name, reg->idx[0].offset); +} + static void vsir_validate_constbuffer_register(struct validation_context *ctx, const struct vkd3d_shader_register *reg) { @@ -7725,9 +7843,7 @@ static void vsir_validate_constbuffer_register(struct validation_context *ctx, return; } - if (reg->idx[0].rel_addr) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, - "Non-NULL relative address for a CONSTBUFFER register ID."); + vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, "cb"); } static void vsir_validate_sampler_register(struct validation_context *ctx, @@ -7753,9 +7869,7 @@ static void vsir_validate_sampler_register(struct validation_context *ctx, return; } - if (reg->idx[0].rel_addr) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, - "Non-NULL relative address for the descriptor index of a SAMPLER register."); + vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, "s"); } static void vsir_validate_resource_register(struct validation_context *ctx, @@ -7780,9 +7894,7 @@ static void vsir_validate_resource_register(struct validation_context *ctx, return; } - if (reg->idx[0].rel_addr) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, - "Non-NULL relative address for the descriptor index of a RESOURCE register."); + vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, "t"); } static void vsir_validate_uav_register(struct validation_context *ctx, @@ -7812,9 +7924,7 @@ static void vsir_validate_uav_register(struct validation_context *ctx, return; } - if (reg->idx[0].rel_addr) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, - "Non-NULL relative address for the descriptor index of a UAV register."); + vsir_validate_descriptor_indices(ctx, reg, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, "u"); } static void vsir_validate_ssa_register(struct validation_context *ctx, @@ -9824,6 +9934,9 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t if (program->shader_version.major <= 2) vsir_transform(&ctx, vsir_program_ensure_diffuse); + if (program->shader_version.major < 4) + vsir_transform(&ctx, vsir_program_normalize_addr); + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) vsir_transform(&ctx, vsir_program_remap_output_signature); diff --git a/libs/vkd3d/libs/vkd3d-shader/msl.c b/libs/vkd3d/libs/vkd3d-shader/msl.c index 756b43298d3..a5d952cd525 100644 --- a/libs/vkd3d/libs/vkd3d-shader/msl.c +++ b/libs/vkd3d/libs/vkd3d-shader/msl.c @@ -1310,6 +1310,7 @@ int msl_compile(struct vsir_program *program, uint64_t config_flags, return ret; VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); + VKD3D_ASSERT(program->has_descriptor_info); if ((ret = msl_generator_init(&generator, program, compile_info, message_context)) < 0) return ret; diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 0cc1ceca798..91a6686eb0d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -11361,6 +11361,7 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, return ret; VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); + VKD3D_ASSERT(program->has_descriptor_info); if (!(spirv_compiler = spirv_compiler_create(program, compile_info, message_context, config_flags))) diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index 2afeff086e5..9191429c439 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -1127,7 +1127,7 @@ static void vkd3d_shader_scan_combined_sampler_declaration( &semantic->resource.range, semantic->resource_type, VKD3D_SHADER_RESOURCE_DATA_FLOAT); } -static const struct vkd3d_shader_descriptor_info1 *find_descriptor( +const struct vkd3d_shader_descriptor_info1 *vkd3d_shader_find_descriptor( const struct vkd3d_shader_scan_descriptor_info1 *info, enum vkd3d_shader_descriptor_type type, unsigned int register_id) { @@ -1181,11 +1181,11 @@ static void vkd3d_shader_scan_combined_sampler_usage(struct vkd3d_shader_scan_co if (dynamic_resource || dynamic_sampler) return; - if ((d = find_descriptor(context->scan_descriptor_info, + if ((d = vkd3d_shader_find_descriptor(context->scan_descriptor_info, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource->idx[0].offset))) resource_space = d->register_space; - if (sampler && (d = find_descriptor(context->scan_descriptor_info, + if (sampler && (d = vkd3d_shader_find_descriptor(context->scan_descriptor_info, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler->idx[0].offset))) sampler_space = d->register_space; } @@ -1606,6 +1606,9 @@ static int vsir_program_scan(struct vsir_program *program, const struct vkd3d_sh vkd3d_shader_scan_context_init(&context, &program->shader_version, compile_info, add_descriptor_info ? &program->descriptors : NULL, combined_sampler_info, message_context); + if (add_descriptor_info) + program->has_descriptor_info = true; + if (TRACE_ON()) vsir_program_trace(program); @@ -2046,6 +2049,9 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_SPIRV_BINARY, #if defined(HAVE_SPIRV_TOOLS) || defined(VKD3D_SHADER_UNSUPPORTED_SPIRV_PARSER) VKD3D_SHADER_TARGET_SPIRV_TEXT, +#endif +#ifdef VKD3D_SHADER_UNSUPPORTED_GLSL + VKD3D_SHADER_TARGET_GLSL, #endif VKD3D_SHADER_TARGET_D3D_ASM, VKD3D_SHADER_TARGET_D3D_BYTECODE, diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index e794257b9d8..bf794d5e936 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -1444,6 +1444,9 @@ struct vkd3d_shader_scan_descriptor_info1 unsigned int descriptor_count; }; +const struct vkd3d_shader_descriptor_info1 *vkd3d_shader_find_descriptor( + const struct vkd3d_shader_scan_descriptor_info1 *info, + enum vkd3d_shader_descriptor_type type, unsigned int register_id); void vkd3d_shader_free_scan_descriptor_info1(struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info); struct vsir_program @@ -1456,6 +1459,7 @@ struct vsir_program struct shader_signature patch_constant_signature; struct vkd3d_shader_scan_descriptor_info1 descriptors; + bool has_descriptor_info; unsigned int parameter_count; const struct vkd3d_shader_parameter1 *parameters; -- 2.47.2