diff --git a/libs/vkd3d-shader/ir.c b/libs/vkd3d-shader/ir.c index dd59aa4f1..12c37adbd 100644 --- a/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d-shader/ir.c @@ -3473,6 +3473,7 @@ struct io_normaliser struct shader_signature *input_signature; struct shader_signature *output_signature; struct shader_signature *patch_constant_signature; + struct vsir_normalisation_flags *normalisation_flags; enum vkd3d_shader_opcode phase; @@ -3952,7 +3953,7 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par vkd3d_unreachable(); e = &signature->elements[element_idx]; - if (vsir_signature_element_is_array(e)) + if (vsir_signature_element_is_array(e, normaliser->normalisation_flags)) id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); /* Replace the register index with the signature element index */ @@ -4034,7 +4035,7 @@ static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_par } e = &signature->elements[element_idx]; - if (vsir_signature_element_is_array(e)) + if (vsir_signature_element_is_array(e, normaliser->normalisation_flags)) id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index); reg->idx[id_idx].offset = element_idx; reg->idx_count = id_idx + 1; @@ -4089,6 +4090,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program normaliser.input_signature = &program->input_signature; normaliser.output_signature = &program->output_signature; normaliser.patch_constant_signature = &program->patch_constant_signature; + normaliser.normalisation_flags = &program->normalisation_flags; for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) { @@ -8200,6 +8202,742 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr return VKD3D_OK; } +struct sysval_array_normaliser +{ + struct vsir_transformation_context *ctx; + + /* sysval semantic currently being normalised. */ + enum vkd3d_shader_sysval_semantic sysval_semantic; + bool output; + + /* Registers used by the sysval elements of the original signature. */ + struct + { + unsigned int index; + unsigned int mask; + } regs[2]; + unsigned int reg_count; + + /* Index of the signature element created for the new array. */ + unsigned int element_idx; + /* Indexable temporary reserved to store a copy of the native sysval + * values for the current phase. If ~0u, the temporary has not been + * allocated for this phase yet. */ + unsigned int idxtemp_idx; + + enum vkd3d_shader_opcode phase; +}; + +static enum vkd3d_result sysval_array_normaliser_add_components( + struct sysval_array_normaliser *normaliser, unsigned int index, unsigned int mask) +{ + unsigned int q; + + for (q = 0; q < normaliser->reg_count; ++q) + { + if (index == normaliser->regs[q].index) + break; + } + + if (q == normaliser->reg_count) + { + if (normaliser->reg_count >= ARRAY_SIZE(normaliser->regs)) + { + vkd3d_shader_error(normaliser->ctx->message_context, + &normaliser->ctx->null_location, VKD3D_SHADER_ERROR_VSIR_INVALID_SIGNATURE, + "Sysval semantic %#x elements require more than %zu registers.\n", + normaliser->sysval_semantic, ARRAY_SIZE(normaliser->regs)); + return VKD3D_ERROR_INVALID_SHADER; + } + normaliser->reg_count += 1; + } + normaliser->regs[q].index = index; + normaliser->regs[q].mask |= mask; + + return VKD3D_OK; +} + +static enum vkd3d_result sysval_array_normaliser_init(struct vsir_transformation_context *ctx, + const char *semantic_name, enum vkd3d_shader_sysval_semantic sysval_semantic, + bool output, struct sysval_array_normaliser *normaliser) +{ + unsigned int component_count = 0, next_register_index; + struct shader_signature *signature; + struct signature_element *element; + enum vkd3d_result res; + + memset(normaliser, 0, sizeof(*normaliser)); + normaliser->ctx = ctx; + normaliser->sysval_semantic = sysval_semantic; + normaliser->output = output; + normaliser->element_idx = ~0u; + + normaliser->phase = VSIR_OP_INVALID; + + signature = output ? &ctx->program->output_signature : &ctx->program->input_signature; + + for (unsigned int i = 0; i < signature->element_count; ++i) + { + element = &signature->elements[i]; + if (element->sysval_semantic != sysval_semantic) + continue; + + for (unsigned int j = 0; j < element->register_count; ++j) + { + if ((res = sysval_array_normaliser_add_components(normaliser, + element->register_index + j, element->mask)) < 0) + return res; + } + } + + if (!normaliser->reg_count) + return VKD3D_OK; + next_register_index = vsir_signature_next_location(signature); + if (!(element = add_signature_element(signature, semantic_name, next_register_index, + VKD3DSP_WRITEMASK_0, signature->element_count, element->interpolation_mode))) + return VKD3D_ERROR_OUT_OF_MEMORY; + element->sysval_semantic = sysval_semantic; + for (unsigned int q = 0; q < normaliser->reg_count; ++q) + { + component_count += vkd3d_popcount(normaliser->regs[q].mask); + } + element->register_count = component_count; + normaliser->element_idx = signature->element_count - 1; + + return VKD3D_OK; +} + +/* For every component 'k' that belongs to an output signature element that + * has the sysval currently being handled by the sysval_array_normaliser, add + * the following instruction before the return points of the program: + * + * mov o[k][e].x, x[idxtmp_idx][q].kkkk + * + * or in case this is the control point phase of a hull shader: + * + * mov o[k][P][e].x, x[idxtmp_idx][q].kkkk + * + * where: + * 'q' is the index of the register containing 'k' in the normaliser's + * internal list. + * '.kkkk' is the replicated swizzle that corresponds to component 'k'. + * 'e' is the new array's signature element index. + * 'idxtmp_idx' is the index of the indexable temp reserved by the + * normaliser. + * 'P' is the output control point ID. + */ +static enum vkd3d_result sysval_array_normaliser_add_output_copy( + struct sysval_array_normaliser *normaliser, struct vsir_program_iterator *it) +{ + struct vsir_program *program = normaliser->ctx->program; + struct vkd3d_shader_src_param *outpointid_param = NULL; + unsigned int output_component_count = 0; + struct vkd3d_shader_instruction *mov; + struct signature_element *element; + struct vkd3d_shader_location loc; + + if (!normaliser->output) + return VKD3D_OK; + if (vsir_opcode_is_fork_or_join_phase(normaliser->phase)) + return VKD3D_OK; + if (normaliser->idxtemp_idx == ~0u) + return VKD3D_OK; + + element = &program->output_signature.elements[normaliser->element_idx]; + loc = vsir_program_iterator_current(it)->location; + + if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL + && !(outpointid_param = vsir_program_create_outpointid_param(program))) + { + ERR("Failed to allocate outpointid param.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (unsigned int q = 0; q < normaliser->reg_count; ++q) + { + for (unsigned int k = 0; k < VKD3D_VEC4_SIZE; ++k) + { + struct vkd3d_shader_dst_param *dst; + struct vkd3d_shader_src_param *src; + + if (!(normaliser->regs[q].mask & (1u << k))) + continue; + + if (!(mov = vsir_program_iterator_insert_before_and_move(it, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (!vsir_instruction_init_with_params(program, mov, &loc, VSIR_OP_MOV, 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst = &mov->dst[0]; + vsir_dst_param_init(dst, VKD3DSPR_OUTPUT, VSIR_DATA_F32, 2); + dst->reg.idx[0].offset = output_component_count++; + dst->reg.idx[1].offset = normaliser->element_idx; + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->write_mask = VKD3DSP_WRITEMASK_0; + if (outpointid_param) + { + dst->reg.idx_count = 3; + dst->reg.idx[2] = dst->reg.idx[1]; + dst->reg.idx[1].rel_addr = outpointid_param; + dst->reg.idx[1].offset = 0; + } + + src = &mov->src[0]; + vsir_src_param_init(src, VKD3DSPR_IDXTEMP, VSIR_DATA_F32, 2); + src->reg.idx[0].offset = normaliser->idxtemp_idx; + src->reg.idx[1].offset = q; + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = vsir_swizzle_from_writemask(1u << k); + + vsir_program_iterator_next(it); + } + } + VKD3D_ASSERT(output_component_count == element->register_count); + + return VKD3D_OK; +} + +/* For every component 'k' that belongs to an input signature element that has + * the sysval currently being handled by the sysval_array_normaliser, add the + * following single instruction at the beginning of the program: + * + * mov x[idxtmp_idx][q].k, v[k][e].x + * + * or in case there are multiple input control points, add multiple + * instructions, one for every one of them 'p': + * + * mov x[idxtmp_idx][p * reg_count + q].k, v[k][p][e].x + * + * where: + * 'q' is the index of the register containing 'k' in the normaliser's + * internal list. + * '.k' is the write mask that corresponds to component 'k' + * 'e' is the new array's signature element index. + * 'idxtmp_idx' is the index of the indexable temp reserved by the + * normaliser. + * 'reg_count' is the number of registers in the normaliser's internal + * list. + * + * NOTE: This function also does this for components 'k' that belong to an + * output signature in case the normaliser is handling an output semantic and + * this is the fork or join phase of a hull shader, where they can be used as + * source operands. Naturally, 'o' registers are used as source operands on + * such 'mov' instructions instead of 'v'. + */ +static enum vkd3d_result sysval_array_normaliser_add_input_copy( + struct sysval_array_normaliser *normaliser, struct vsir_program_iterator *it) +{ + struct vsir_program *program = normaliser->ctx->program; + struct vkd3d_shader_instruction *mov; + struct signature_element *element; + unsigned int control_point_count; + struct vkd3d_shader_location loc; + + loc = vsir_program_iterator_current(it)->location; + if (normaliser->output) + { + control_point_count = program->output_control_point_count; + element = &program->output_signature.elements[normaliser->element_idx]; + } + else + { + control_point_count = program->input_control_point_count; + element = &program->input_signature.elements[normaliser->element_idx]; + } + + if (!vsir_program_iterator_insert_before_and_move(it, max(1, control_point_count) * element->register_count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (unsigned int p = 0; p < max(1, control_point_count); ++p) + { + unsigned int input_component_count = 0; + + for (unsigned int q = 0; q < normaliser->reg_count; ++q) + { + for (unsigned int k = 0; k < VKD3D_VEC4_SIZE; ++k) + { + struct vkd3d_shader_dst_param *dst; + struct vkd3d_shader_src_param *src; + + if (!(normaliser->regs[q].mask & (1u << k))) + continue; + + mov = vsir_program_iterator_current(it); + vsir_instruction_init_with_params(program, mov, &loc, VSIR_OP_MOV, 1, 1); + + dst = &mov->dst[0]; + vsir_dst_param_init(dst, VKD3DSPR_IDXTEMP, VSIR_DATA_F32, 2); + dst->reg.idx[0].offset = normaliser->idxtemp_idx; + dst->reg.idx[1].offset = p * normaliser->reg_count + q; + dst->reg.dimension = VSIR_DIMENSION_VEC4; + dst->write_mask = 1u << k; + + src = &mov->src[0]; + if (control_point_count) + { + vsir_src_param_init(src, normaliser->output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT, VSIR_DATA_F32, 3); + src->reg.idx[0].offset = input_component_count++; + src->reg.idx[1].offset = p; + src->reg.idx[2].offset = normaliser->element_idx; + } + else + { + vsir_src_param_init(src, VKD3DSPR_INPUT, VSIR_DATA_F32, 2); + src->reg.idx[0].offset = input_component_count++; + src->reg.idx[1].offset = normaliser->element_idx; + } + src->reg.dimension = VSIR_DIMENSION_VEC4; + src->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + + vsir_program_iterator_next(it); + } + } + VKD3D_ASSERT(input_component_count == element->register_count); + } + + return VKD3D_OK; +} + +/* NOTE: This might be replaced by a single field in vsir_program at some point. */ +static unsigned int vsir_program_get_idxtemp_count(struct vsir_program *program) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; + size_t count = 0; + + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + { + if (ins->opcode != VSIR_OP_DCL_INDEXABLE_TEMP) + continue; + if (count < ins->declaration.indexable_temp.register_idx) + count = ins->declaration.indexable_temp.register_idx; + } + + return count; +} + +static enum vkd3d_result sysval_array_normaliser_dcl_indexable_temp( + struct sysval_array_normaliser *normaliser, struct vsir_program_iterator *it, size_t idx) +{ + struct vsir_program *program = normaliser->ctx->program; + unsigned int register_size = normaliser->reg_count; + struct vkd3d_shader_indexable_temp *t; + struct vkd3d_shader_instruction *ins; + unsigned int control_point_count; + + normaliser->idxtemp_idx = idx; + control_point_count = normaliser->output + ? program->output_control_point_count : program->input_control_point_count; + + if (control_point_count && (!normaliser->output || vsir_opcode_is_fork_or_join_phase(normaliser->phase))) + register_size *= program->input_control_point_count; + + if (!(ins = vsir_program_iterator_insert_before_and_move(it, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init_with_params(program, ins, &normaliser->ctx->null_location, VSIR_OP_DCL_INDEXABLE_TEMP, 0, 0); + t = &ins->declaration.indexable_temp; + t->register_idx = normaliser->idxtemp_idx; + t->register_size = register_size; + t->alignment = 0; + t->data_type = VSIR_DATA_F32; + t->component_count = 4; + t->has_function_scope = false; + + vsir_program_iterator_next(it); + + return VKD3D_OK; +} + +static bool vsir_program_validate_outpointid_control_point_index(const struct vkd3d_shader_register *reg) +{ + const struct vkd3d_shader_register_index *index; + + if (reg->idx_count < 2) + return false; + + index = ®->idx[reg->idx_count - 2]; + if (index->offset) + return false; + if (!index->rel_addr || index->rel_addr->reg.type != VKD3DSPR_OUTPOINTID) + return false; + if (index->rel_addr->reg.idx_count) + return false; + return true; +} + +/* If a register refers to a signature element of index 'e' that has the + * sysval being handled by the normaliser, this maps the register as follows: + * + * v[e] -> x[idxtmp_idx][q] + * + * v[i][e] -> x[idxtmp_idx][i + q] + * on shaders without control points. + * + * v[p][e] -> x[idxtmp_idx][p * reg_count + q], + * on shaders with control points. + * + * v[i][p][e] -> x[idxtmp_idx][p * reg_count + i + q] + * on shaders with control points. + * + * o[e] -> x[idxtmp_idx][q] + * + * o[i][e] -> x[idxtmp_idx][i + q] + * on shaders without control points. + * + * o[p][e] -> x[idxtmp_idx][p * reg_count + q] + * if on HS fork/join phase, where it is a src. + * + * o[P][e] -> x[idxtmp_idx][q] + * if on HS control point phase, where it is a dst. + * P is expected to always be the output control point ID. + * + * o[i][p][e] -> x[idxtmp_idx][p * reg_count + i + q] + * if on HS fork/join phase, where it is a src. + * + * o[i][P][e] -> x[idxtmp_idx][i + q] + * if on HS control point phase, where it is a dst. + * P is expected to always be the output control point ID. + * + * where: + * 'q' is the index of the register that matches signature element 'e' in + * the normaliser's internal list. + * 'idxtmp_idx' is the index of the indexable temp reserved by the + * normaliser. + * 'reg_count' is the number of registers in the normaliser's internal + * list. + * + * The swizzle (for source operands) is also combined with the mask of the + * relevant signature element 'e'. + */ +static enum vkd3d_result sysval_array_normaliser_map_register(struct sysval_array_normaliser *normaliser, + struct vsir_program_iterator *it, struct vkd3d_shader_register *reg, unsigned int *src_swizzle) +{ + struct vkd3d_shader_register_index i_idx = {0}, p_idx = {0}; + struct vsir_program *program = normaliser->ctx->program; + unsigned int element_index, control_point_count; + struct vkd3d_shader_instruction *ssa_ins; + struct shader_signature *signature; + struct signature_element *element; + struct vkd3d_shader_location loc; + unsigned int q; + + loc = vsir_program_iterator_current(it)->location; + + signature = normaliser->output ? &program->output_signature : &program->input_signature; + control_point_count = normaliser->output ? program->output_control_point_count + : program->input_control_point_count; + + for (unsigned int i = 0; i < reg->idx_count; ++i) + { + if (reg->idx[i].rel_addr) + sysval_array_normaliser_map_register(normaliser, it, + ®->idx[i].rel_addr->reg, ®->idx[i].rel_addr->swizzle); + } + + if (normaliser->output && reg->type != VKD3DSPR_OUTPUT) + return VKD3D_OK; + if (!normaliser->output && reg->type != VKD3DSPR_INPUT) + return VKD3D_OK; + + element_index = reg->idx[reg->idx_count - 1].offset; + element = &signature->elements[element_index]; + if (element->sysval_semantic != normaliser->sysval_semantic) + return VKD3D_OK; + + for (q = 0; q < normaliser->reg_count; ++q) + { + if (normaliser->regs[q].index == element->register_index) + break; + } + VKD3D_ASSERT(q < normaliser->reg_count); + + if (normaliser->output && normaliser->phase == VSIR_OP_HS_CONTROL_POINT_PHASE) + { + if (!vsir_program_validate_outpointid_control_point_index(reg)) + vkd3d_shader_error(normaliser->ctx->message_context, &loc, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, + "Control point index of output source operand is not OUTPOINTID.\n"); + } + + if (control_point_count) + { + if (reg->idx_count == 3) + { + i_idx = reg->idx[0]; + p_idx = reg->idx[1]; + } + else + { + p_idx = reg->idx[0]; + } + } + else if (reg->idx_count == 2) + { + i_idx = reg->idx[0]; + } + + reg->type = VKD3DSPR_IDXTEMP; + reg->idx[0].offset = normaliser->idxtemp_idx; + reg->idx[0].rel_addr = NULL; + reg->idx_count = 2; + + if (p_idx.rel_addr && !(normaliser->output && normaliser->phase == VSIR_OP_HS_CONTROL_POINT_PHASE)) + { + if (!(ssa_ins = vsir_program_iterator_insert_before_and_move(it, 1 + !!i_idx.rel_addr))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (!vsir_instruction_init_with_params(program, ssa_ins, &loc, VSIR_OP_IMUL_LOW, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_register_init(&ssa_ins->dst[0].reg, VKD3DSPR_SSA, VSIR_DATA_U32, 1); + ssa_ins->dst[0].reg.idx[0].offset = program->ssa_count++; + ssa_ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ssa_ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; + ssa_ins->src[0] = *p_idx.rel_addr; + src_param_init_const_uint(&ssa_ins->src[1], normaliser->reg_count); + + if (i_idx.rel_addr) + { + ssa_ins = vsir_program_iterator_next(it); + if (!vsir_instruction_init_with_params(program, ssa_ins, &loc, VSIR_OP_ADD, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_register_init(&ssa_ins->dst[0].reg, VKD3DSPR_SSA, VSIR_DATA_U32, 1); + ssa_ins->dst[0].reg.idx[0].offset = program->ssa_count++; + ssa_ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ssa_ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0; + vsir_register_init(&ssa_ins->src[0].reg, VKD3DSPR_SSA, VSIR_DATA_U32, 1); + ssa_ins->src[0].reg.idx[0].offset = program->ssa_count - 2; + ssa_ins->src[1] = *i_idx.rel_addr; + } + + vsir_program_iterator_next(it); + + reg->idx[1].offset = normaliser->reg_count * p_idx.offset + i_idx.offset + q; + if (!(reg->idx[1].rel_addr = vsir_program_get_src_params(program, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + vsir_register_init(®->idx[1].rel_addr->reg, VKD3DSPR_SSA, VSIR_DATA_U32, 1); + reg->idx[1].rel_addr->reg.idx[0].offset = program->ssa_count - 1; + reg->idx[1].rel_addr->reg.dimension = VSIR_DIMENSION_VEC4; + reg->idx[1].rel_addr->swizzle = VKD3D_SHADER_SWIZZLE_X; + reg->idx[1].rel_addr->modifiers = 0; + } + else + { + reg->idx[1].offset = normaliser->reg_count * p_idx.offset + i_idx.offset + q; + reg->idx[1].rel_addr = i_idx.rel_addr; + } + + if (src_swizzle) + *src_swizzle = vsir_combine_swizzles(vsir_swizzle_from_writemask(element->mask), *src_swizzle); + + return VKD3D_OK; +} + +static enum vkd3d_result sysval_array_normaliser_map_instruction( + struct sysval_array_normaliser *normaliser, struct vsir_program_iterator *it) +{ + struct vkd3d_shader_instruction *ins = vsir_program_iterator_current(it); + unsigned int src_count, dst_count; + enum vkd3d_result res; + + if (vsir_instruction_is_dcl(ins)) + return VKD3D_OK; + + dst_count = ins->dst_count; + src_count = ins->src_count; + + for (unsigned int k = 0; k < dst_count; ++k) + { + ins = vsir_program_iterator_current(it); + if ((res = sysval_array_normaliser_map_register(normaliser, it, &ins->dst[k].reg, NULL))) + return res; + } + + for (unsigned int k = 0; k < src_count; ++k) + { + ins = vsir_program_iterator_current(it); + if ((res = sysval_array_normaliser_map_register(normaliser, it, &ins->src[k].reg, &ins->src[k].swizzle))) + return res; + } + + return VKD3D_OK; +} + +static void shader_register_remove_signature_element(struct vkd3d_shader_register *reg, + enum vkd3d_shader_register_type type, unsigned int index) +{ + unsigned int current_idx; + + for (unsigned int i = 0; i < reg->idx_count; ++i) + { + if (reg->idx[i].rel_addr) + shader_register_remove_signature_element(®->idx[i].rel_addr->reg, type, index); + } + + if (reg->type != type) + return; + + VKD3D_ASSERT(!reg->idx[reg->idx_count - 1].rel_addr); + current_idx = reg->idx[reg->idx_count - 1].offset; + VKD3D_ASSERT(current_idx != index); + if (current_idx > index) + --reg->idx[reg->idx_count - 1].offset; +} + +static void vsir_program_remove_signature_element(struct vsir_program *program, + enum vkd3d_shader_register_type type, unsigned int index) +{ + struct vsir_program_iterator it = vsir_program_iterator(&program->instructions); + struct vkd3d_shader_instruction *ins; + struct shader_signature *signature; + + switch (type) + { + case VKD3DSPR_INPUT: + signature = &program->input_signature; + break; + case VKD3DSPR_OUTPUT: + signature = &program->output_signature; + break; + case VKD3DSPR_PATCHCONST: + signature = &program->patch_constant_signature; + break; + default: + vkd3d_unreachable(); + } + + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + { + if (vsir_instruction_is_dcl(ins)) + continue; + for (unsigned int i = 0; i < ins->dst_count; ++i) + shader_register_remove_signature_element(&ins->dst[i].reg, type, index); + for (unsigned int i = 0; i < ins->src_count; ++i) + shader_register_remove_signature_element(&ins->src[i].reg, type, index); + } + + memmove(&signature->elements[index], &signature->elements[index + 1], + sizeof(*signature->elements) * (signature->element_count - 1 - index)); + --signature->element_count; +} + +static void sysval_array_normaliser_remove_old_signature_elements(struct sysval_array_normaliser *normaliser) +{ + struct vsir_program *program = normaliser->ctx->program; + enum vkd3d_shader_register_type type; + struct shader_signature *signature; + struct signature_element *element; + + signature = normaliser->output ? &program->output_signature : &program->input_signature; + type = normaliser->output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + + for (int i = signature->element_count - 2; i >= 0; --i) + { + element = &signature->elements[i]; + if (element->sysval_semantic != normaliser->sysval_semantic) + continue; + TRACE("Removing %s signature element index %u.\n", normaliser->output ? "output" : "input", i); + vsir_program_remove_signature_element(program, type, i); + } +} + +static enum vkd3d_result vsir_program_normalise_sysval_array(struct vsir_transformation_context *ctx, + const char *semantic_name, enum vkd3d_shader_sysval_semantic sysval_semantic, bool output) +{ + struct vsir_program *program = ctx->program; + struct sysval_array_normaliser normaliser; + struct vkd3d_shader_instruction *ins; + struct vsir_program_iterator it; + bool declarations = true; + enum vkd3d_result res; + + if ((res = sysval_array_normaliser_init(ctx, semantic_name, sysval_semantic, output, &normaliser)) < 0) + return res; + + if (!normaliser.reg_count) + return VKD3D_OK; + + if (!output && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX) + return VKD3D_OK; + + if (TRACE_ON()) + vsir_program_trace(program); + + it = vsir_program_iterator(&program->instructions); + for (ins = vsir_program_iterator_head(&it); ins; ins = vsir_program_iterator_next(&it)) + { + if (ins->opcode == VSIR_OP_HS_DECLS || ins->opcode == VSIR_OP_HS_CONTROL_POINT_PHASE + || ins->opcode == VSIR_OP_HS_FORK_PHASE || ins->opcode == VSIR_OP_HS_JOIN_PHASE) + { + normaliser.phase = ins->opcode; + declarations = true; + continue; + } + + if (declarations && !vsir_instruction_is_dcl(ins) && ins->opcode != VSIR_OP_NOP) + { + unsigned int idxtemp_idx = vsir_program_get_idxtemp_count(program) + 1; + + declarations = false; + + if ((res = sysval_array_normaliser_dcl_indexable_temp(&normaliser, &it, idxtemp_idx)) < 0) + return res; + + if (vsir_program_iterator_current(&it)->opcode == VSIR_OP_LABEL) + ins = vsir_program_iterator_next(&it); + + if ((!output || vsir_opcode_is_fork_or_join_phase(normaliser.phase)) + && (res = sysval_array_normaliser_add_input_copy(&normaliser, &it)) < 0) + return res; + } + + if (!declarations) + { + if (ins->opcode == VSIR_OP_RET || ins->opcode == VSIR_OP_EMIT || ins->opcode == VSIR_OP_EMIT_STREAM) + { + if ((output && !vsir_opcode_is_fork_or_join_phase(normaliser.phase)) + && (res = sysval_array_normaliser_add_output_copy(&normaliser, &it)) < 0) + return res; + } + else + { + if ((res = sysval_array_normaliser_map_instruction(&normaliser, &it)) < 0) + return res; + } + } + } + VKD3D_ASSERT(!declarations); + if (TRACE_ON()) + vsir_program_trace(program); + sysval_array_normaliser_remove_old_signature_elements(&normaliser); + + return VKD3D_OK; +} + +/* This pass transform clip/cull system values from the Direct3D convention of + * 2 4-component registers, into the SPIR-V/GLSL convention of 8-element + * scalar float arrays. */ +static enum vkd3d_result vsir_program_normalise_clip_cull( + struct vsir_program *program, struct vsir_transformation_context *ctx) +{ + enum vkd3d_result res; + + if ((res = vsir_program_normalise_sysval_array(ctx, "SV_ClipDistance", VKD3D_SHADER_SV_CLIP_DISTANCE, false)) < 0) + return res; + if ((res = vsir_program_normalise_sysval_array(ctx, "SV_ClipDistance", VKD3D_SHADER_SV_CLIP_DISTANCE, true)) < 0) + return res; + if ((res = vsir_program_normalise_sysval_array(ctx, "SV_CullDistance", VKD3D_SHADER_SV_CULL_DISTANCE, false)) < 0) + return res; + if ((res = vsir_program_normalise_sysval_array(ctx, "SV_CullDistance", VKD3D_SHADER_SV_CULL_DISTANCE, true)) < 0) + return res; + + program->normalisation_flags.normalised_clip_cull_arrays = true; + + return VKD3D_OK; +} + static bool is_pre_rasterization_shader(enum vkd3d_shader_type type) { return type == VKD3D_SHADER_TYPE_VERTEX @@ -10529,7 +11267,7 @@ static void vsir_validate_io_register(struct validation_context *ctx, const stru } element = &signature->elements[signature_idx]; - is_array = vsir_signature_element_is_array(element); + is_array = vsir_signature_element_is_array(element, &ctx->program->normalisation_flags); expected_idx_count = 1 + !!has_control_point + !!is_array; control_point_index = !!is_array; @@ -14243,6 +14981,7 @@ enum vkd3d_result vsir_program_transform(struct vsir_program *program, uint64_t vsir_transform(&ctx, vsir_program_apply_flat_interpolation); vsir_transform(&ctx, vsir_program_insert_alpha_test); vsir_transform(&ctx, vsir_program_insert_clip_planes); + vsir_transform(&ctx, vsir_program_normalise_clip_cull); vsir_transform(&ctx, vsir_program_insert_point_size); vsir_transform(&ctx, vsir_program_insert_point_size_clamp); vsir_transform(&ctx, vsir_program_insert_point_coord); diff --git a/libs/vkd3d-shader/spirv.c b/libs/vkd3d-shader/spirv.c index 5dd529451..a0059cf7d 100644 --- a/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d-shader/spirv.c @@ -5560,11 +5560,13 @@ static const struct vkd3d_symbol *spirv_compiler_emit_io_register(struct spirv_c const struct vkd3d_spirv_builtin *builtin; struct vkd3d_symbol reg_symbol; SpvStorageClass storage_class; + unsigned int array_size; uint32_t write_mask, id; struct rb_entry *entry; - VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); - VKD3D_ASSERT(reg->idx_count < 2); + VKD3D_ASSERT(reg->idx_count < 1 || !reg->idx[0].rel_addr); + VKD3D_ASSERT(reg->idx_count < 2 || !reg->idx[1].rel_addr); + VKD3D_ASSERT(reg->idx_count < 3); if (reg->type == VKD3DSPR_RASTOUT && reg->idx[0].offset == VSIR_RASTOUT_POINT_SIZE) { @@ -5582,7 +5584,8 @@ static const struct vkd3d_symbol *spirv_compiler_emit_io_register(struct spirv_c if ((entry = rb_get(&compiler->symbol_table, ®_symbol))) return RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); - id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, 0); + array_size = (reg->idx_count > 1) ? reg->idx[0].offset : 0; + id = spirv_compiler_emit_builtin_variable(compiler, builtin, storage_class, array_size); spirv_compiler_emit_register_execution_mode(compiler, reg->type); spirv_compiler_emit_register_debug_name(builder, id, reg); @@ -5646,8 +5649,7 @@ static void spirv_compiler_emit_input(struct spirv_compiler *compiler, array_sizes[0] = signature_element->register_count; array_sizes[1] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->input_control_point_count); - if (!vsir_signature_element_is_array(signature_element) - && (!vsir_sysval_semantic_is_clip_cull(signature_element->sysval_semantic) || array_sizes[1])) + if (!vsir_signature_element_is_array(signature_element, &compiler->program->normalisation_flags)) array_sizes[0] = 0; write_mask = signature_element->mask; @@ -5780,88 +5782,6 @@ static bool is_dual_source_blending(const struct spirv_compiler *compiler) return compiler->shader_type == VKD3D_SHADER_TYPE_PIXEL && info && info->dual_source_blending; } -static void calculate_clip_or_cull_distance_mask(const struct signature_element *e, uint32_t *mask) -{ - unsigned int write_mask; - - if (e->semantic_index >= sizeof(*mask) * CHAR_BIT / VKD3D_VEC4_SIZE) - { - FIXME("Invalid semantic index %u for clip/cull distance.\n", e->semantic_index); - return; - } - - write_mask = e->mask; - *mask |= (write_mask & VKD3DSP_WRITEMASK_ALL) << (VKD3D_VEC4_SIZE * e->semantic_index); -} - -/* Emits arrayed SPIR-V built-in variables. */ -static void spirv_compiler_emit_shader_signature_outputs(struct spirv_compiler *compiler) -{ - const struct shader_signature *output_signature = &compiler->program->output_signature; - uint32_t clip_distance_mask = 0, clip_distance_id = 0; - uint32_t cull_distance_mask = 0, cull_distance_id = 0; - const struct vkd3d_spirv_builtin *builtin; - unsigned int i, count; - - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *e = &output_signature->elements[i]; - - switch (e->sysval_semantic) - { - case VKD3D_SHADER_SV_CLIP_DISTANCE: - calculate_clip_or_cull_distance_mask(e, &clip_distance_mask); - break; - - case VKD3D_SHADER_SV_CULL_DISTANCE: - calculate_clip_or_cull_distance_mask(e, &cull_distance_mask); - break; - - default: - break; - } - } - - if (clip_distance_mask) - { - count = vkd3d_popcount(clip_distance_mask); - builtin = get_spirv_builtin_for_sysval(compiler, VKD3D_SHADER_SV_CLIP_DISTANCE); - clip_distance_id = spirv_compiler_emit_builtin_variable(compiler, - builtin, SpvStorageClassOutput, count); - } - - if (cull_distance_mask) - { - count = vkd3d_popcount(cull_distance_mask); - builtin = get_spirv_builtin_for_sysval(compiler, VKD3D_SHADER_SV_CULL_DISTANCE); - cull_distance_id = spirv_compiler_emit_builtin_variable(compiler, - builtin, SpvStorageClassOutput, count); - } - - for (i = 0; i < output_signature->element_count; ++i) - { - const struct signature_element *e = &output_signature->elements[i]; - - switch (e->sysval_semantic) - { - case VKD3D_SHADER_SV_CLIP_DISTANCE: - compiler->output_info[i].id = clip_distance_id; - compiler->output_info[i].data_type = VSIR_DATA_F32; - compiler->output_info[i].array_element_mask = clip_distance_mask; - break; - - case VKD3D_SHADER_SV_CULL_DISTANCE: - compiler->output_info[i].id = cull_distance_id; - compiler->output_info[i].data_type = VSIR_DATA_F32; - compiler->output_info[i].array_element_mask = cull_distance_mask; - break; - - default: - break; - } - } -} - static uint32_t spirv_compiler_emit_shader_phase_builtin_variable(struct spirv_compiler *compiler, const struct vkd3d_spirv_builtin *builtin, const unsigned int *array_sizes, unsigned int size_count) { @@ -5917,7 +5837,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, sysval = VKD3D_SHADER_SV_NONE; array_sizes[0] = signature_element->register_count; array_sizes[1] = (reg_type == VKD3DSPR_PATCHCONST ? 0 : compiler->output_control_point_count); - if (!vsir_signature_element_is_array(signature_element)) + if (!vsir_signature_element_is_array(signature_element, &compiler->program->normalisation_flags)) array_sizes[0] = 0; builtin = vkd3d_get_spirv_builtin(compiler, reg_type, sysval); @@ -10944,9 +10864,6 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, || (program->shader_version.type == VKD3D_SHADER_TYPE_HULL && !spirv_compiler_is_opengl_target(compiler))) spirv_compiler_emit_tessellator_domain(compiler, program->tess_domain); - if (compiler->shader_type != VKD3D_SHADER_TYPE_HULL) - spirv_compiler_emit_shader_signature_outputs(compiler); - it = vsir_program_iterator(&program->instructions); for (ins = vsir_program_iterator_head(&it); ins && result >= 0; ins = vsir_program_iterator_next(&it)) { @@ -11048,6 +10965,7 @@ int spirv_compile(struct vsir_program *program, uint64_t config_flags, return ret; VKD3D_ASSERT(program->normalisation_level == VSIR_NORMALISED_SM6); + VKD3D_ASSERT(program->normalisation_flags.normalised_clip_cull_arrays); VKD3D_ASSERT(program->has_descriptor_info); VKD3D_ASSERT(program->has_no_modifiers); diff --git a/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d-shader/vkd3d_shader_private.h index b01e62298..c77cdb5d6 100644 --- a/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d-shader/vkd3d_shader_private.h @@ -983,6 +983,11 @@ struct vkd3d_shader_version uint8_t minor; }; +struct vsir_normalisation_flags +{ + bool normalised_clip_cull_arrays; +}; + struct vkd3d_shader_immediate_constant_buffer { unsigned int register_idx; @@ -1200,7 +1205,8 @@ struct signature_element unsigned int target_location; }; -static inline bool vsir_signature_element_is_array(const struct signature_element *element) +static inline bool vsir_signature_element_is_array(const struct signature_element *element, + const struct vsir_normalisation_flags *flags) { enum vkd3d_shader_sysval_semantic semantic = element->sysval_semantic; @@ -1208,6 +1214,8 @@ static inline bool vsir_signature_element_is_array(const struct signature_elemen return true; if (vsir_sysval_semantic_is_tess_factor(semantic)) return true; + if (flags->normalised_clip_cull_arrays && vsir_sysval_semantic_is_clip_cull(semantic)) + return true; return false; } @@ -1637,7 +1645,6 @@ struct vsir_program bool has_fog; uint8_t diffuse_written_mask; enum vsir_control_flow_type cf_type; - enum vsir_normalisation_level normalisation_level; bool has_no_modifiers; enum vkd3d_tessellator_domain tess_domain; enum vkd3d_shader_tessellator_partitioning tess_partitioning; @@ -1645,6 +1652,9 @@ struct vsir_program enum vkd3d_primitive_type input_primitive, output_topology; unsigned int vertices_out_count; + enum vsir_normalisation_level normalisation_level; + struct vsir_normalisation_flags normalisation_flags; + uint32_t io_dcls[VKD3D_BITMAP_SIZE(VKD3DSPR_COUNT)]; struct vsir_features features; diff --git a/tests/hlsl/clip-cull-distance.shader_test b/tests/hlsl/clip-cull-distance.shader_test index 98985a278..ca0c4ffe6 100644 --- a/tests/hlsl/clip-cull-distance.shader_test +++ b/tests/hlsl/clip-cull-distance.shader_test @@ -174,7 +174,7 @@ float4 main(struct vertex_data vertex) : SV_Target [test] clear rtv 0 0.0 0.0 0.0 1.0 -todo draw triangle list 27 +todo(sm<6) draw triangle list 27 probe rtv 0 (160, 120) f32(10.0, 30.0, 50.0, 4984) probe rtv 0 (160, 240) f32(10.0, 30.0, 50.0, 4984) probe rtv 0 (160, 360) f32(10.0, 30.0, 50.0, 4984) @@ -221,7 +221,7 @@ float4 main(struct vertex_data vertex) : SV_Target [test] clear rtv 0 0.0 0.0 0.0 1.0 -todo draw triangle list 27 +todo(sm<6) draw triangle list 27 probe rtv 0 (160, 120) f32(30.0, 50.0, 1.0, 4984) probe rtv 0 (160, 240) f32(30.0, 50.0, 1.0, 4984) probe rtv 0 (160, 360) f32(30.0, 50.0, 1.0, 4984) @@ -568,16 +568,16 @@ void main(triangle vs_out input[3], inout TriangleStream stream) [test] clear rtv 0 1.0 1.0 0.0 1.0 -todo draw triangle strip 4 -probe rtv 0 (160, 120) rgba(1.0, 1.0, 0.0, 1.0) -probe rtv 0 (160, 240) rgba(1.0, 1.0, 0.0, 1.0) -probe rtv 0 (160, 360) rgba(1.0, 1.0, 0.0, 1.0) -probe rtv 0 (320, 120) rgba(1.0, 1.0, 0.0, 1.0) -probe rtv 0 (320, 240) rgba(0.0, 1.0, 0.0, 1.0) -probe rtv 0 (320, 360) rgba(1.0, 1.0, 0.0, 1.0) -probe rtv 0 (480, 120) rgba(1.0, 1.0, 0.0, 1.0) -probe rtv 0 (480, 240) rgba(1.0, 1.0, 0.0, 1.0) -probe rtv 0 (480, 360) rgba(1.0, 1.0, 0.0, 1.0) +todo(sm<6) draw triangle strip 4 +probe rtv 0 (160, 120) f32(1.0, 1.0, 0.0, 1.0) +probe rtv 0 (160, 240) f32(1.0, 1.0, 0.0, 1.0) +probe rtv 0 (160, 360) f32(1.0, 1.0, 0.0, 1.0) +probe rtv 0 (320, 120) f32(1.0, 1.0, 0.0, 1.0) +probe rtv 0 (320, 240) f32(0.0, 1.0, 0.0, 1.0) +probe rtv 0 (320, 360) f32(1.0, 1.0, 0.0, 1.0) +probe rtv 0 (480, 120) f32(1.0, 1.0, 0.0, 1.0) +probe rtv 0 (480, 240) f32(1.0, 1.0, 0.0, 1.0) +probe rtv 0 (480, 360) f32(1.0, 1.0, 0.0, 1.0) % Generate clip and cull distances in the geometry shader.