Giovanni Mascellani 51f13391e6 vkd3d-shader/ir: Introduce a simple control flow graph structurizer.
The structurizer is implemented along the lines of what is usually called
the "structured program theorem": the control flow is completely
virtualized by mean of an additional TEMP register which stores the
block index which is currently running. The whole program is then
converted to a huge switch construction enclosed in a loop, executing
at each iteration the appropriate block and updating the register
depending on block jump instruction.

The algorithm's generality is also its major weakness: it accepts any
input program, even if its CFG is not reducible, but the output
program lacks any useful convergence information. It satisfies the
letter of the SPIR-V requirements, but it is expected that it will
be very inefficient to run on a GPU (unless a downstream compiler is
able to devirtualize the control flow and do a proper convergence
analysis pass). The algorithm is however very simple, and good enough
to at least pass tests, enabling further development. A better
alternative is expected to be upstreamed incrementally.

Side note: the structured program theorem is often called the
Böhm-Jacopini theorem; Böhm and Jacopini did indeed prove a variation
of it, but their algorithm is different from what is commontly attributed
to them and implemented here, so I opted for not using their name.
2024-02-06 23:07:07 +01:00

3934 lines
147 KiB
C

/*
* Copyright 2023 Conor McCarthy for CodeWeavers
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "vkd3d_shader_private.h"
bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve)
{
program->shader_version = *version;
return shader_instruction_array_init(&program->instructions, reserve);
}
void vsir_program_cleanup(struct vsir_program *program)
{
size_t i;
for (i = 0; i < program->block_name_count; ++i)
vkd3d_free((void *)program->block_names[i]);
vkd3d_free(program->block_names);
shader_instruction_array_destroy(&program->instructions);
}
static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg)
{
return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID;
}
static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction)
{
enum vkd3d_shader_opcode handler_idx = instruction->handler_idx;
return (VKD3DSIH_DCL <= handler_idx && handler_idx <= VKD3DSIH_DCL_VERTICES_OUT)
|| handler_idx == VKD3DSIH_HS_DECLS;
}
static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins)
{
struct vkd3d_shader_location location = ins->location;
vsir_instruction_init(ins, &location, VKD3DSIH_NOP);
}
static void remove_dcl_temps(struct vsir_program *program)
{
unsigned int i;
for (i = 0; i < program->instructions.count; ++i)
{
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
if (ins->handler_idx == VKD3DSIH_DCL_TEMPS)
vkd3d_shader_instruction_make_nop(ins);
}
}
static bool vsir_instruction_init_with_params(struct vsir_program *program,
struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location,
enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count)
{
vsir_instruction_init(ins, location, handler_idx);
ins->dst_count = dst_count;
ins->src_count = src_count;
if (!(ins->dst = vsir_program_get_dst_params(program, ins->dst_count)))
{
ERR("Failed to allocate %u destination parameters.\n", dst_count);
return false;
}
if (!(ins->src = vsir_program_get_src_params(program, ins->src_count)))
{
ERR("Failed to allocate %u source parameters.\n", src_count);
return false;
}
memset(ins->dst, 0, sizeof(*ins->dst) * ins->dst_count);
memset(ins->src, 0, sizeof(*ins->src) * ins->src_count);
return true;
}
static enum vkd3d_result instruction_array_lower_texkills(struct vkd3d_shader_parser *parser)
{
struct vsir_program *program = &parser->program;
struct vkd3d_shader_instruction_array *instructions = &program->instructions;
struct vkd3d_shader_instruction *texkill_ins, *ins;
unsigned int components_read = 3 + (program->shader_version.major >= 2);
unsigned int tmp_idx = ~0u;
unsigned int i, k;
for (i = 0; i < instructions->count; ++i)
{
texkill_ins = &instructions->elements[i];
if (texkill_ins->handler_idx != VKD3DSIH_TEXKILL)
continue;
if (!shader_instruction_array_insert_at(instructions, i + 1, components_read + 1))
return VKD3D_ERROR_OUT_OF_MEMORY;
if (tmp_idx == ~0u)
tmp_idx = program->temp_count++;
/* tmp = ins->dst[0] < 0 */
ins = &instructions->elements[i + 1];
if (!vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_LTO, 1, 2))
return VKD3D_ERROR_OUT_OF_MEMORY;
vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
ins->dst[0].reg.idx[0].offset = tmp_idx;
ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL;
ins->src[0].reg = texkill_ins->dst[0].reg;
vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0);
ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4;
ins->src[1].reg.u.immconst_f32[0] = 0.0f;
ins->src[1].reg.u.immconst_f32[1] = 0.0f;
ins->src[1].reg.u.immconst_f32[2] = 0.0f;
ins->src[1].reg.u.immconst_f32[3] = 0.0f;
/* tmp.x = tmp.x || tmp.y */
/* tmp.x = tmp.x || tmp.z */
/* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */
for (k = 1; k < components_read; ++k)
{
ins = &instructions->elements[i + 1 + k];
if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_OR, 1, 2)))
return VKD3D_ERROR_OUT_OF_MEMORY;
vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
ins->dst[0].reg.idx[0].offset = tmp_idx;
ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0;
vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
ins->src[0].reg.idx[0].offset = tmp_idx;
ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4;
ins->src[1].reg.idx[0].offset = tmp_idx;
ins->src[1].swizzle = vkd3d_shader_create_swizzle(k, k, k, k);
}
/* discard_nz tmp.x */
ins = &instructions->elements[i + 1 + components_read];
if (!(vsir_instruction_init_with_params(program, ins, &texkill_ins->location, VKD3DSIH_DISCARD, 0, 1)))
return VKD3D_ERROR_OUT_OF_MEMORY;
ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ;
vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
ins->src[0].reg.idx[0].offset = tmp_idx;
ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
/* Make the original instruction no-op */
vkd3d_shader_instruction_make_nop(texkill_ins);
}
return VKD3D_OK;
}
static void shader_register_eliminate_phase_addressing(struct vkd3d_shader_register *reg,
unsigned int instance_id)
{
unsigned int i;
for (i = 0; i < reg->idx_count; ++i)
{
if (reg->idx[i].rel_addr && shader_register_is_phase_instance_id(&reg->idx[i].rel_addr->reg))
{
reg->idx[i].rel_addr = NULL;
reg->idx[i].offset += instance_id;
}
}
}
static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_instruction *ins,
unsigned int instance_id)
{
struct vkd3d_shader_register *reg;
unsigned int i;
for (i = 0; i < ins->src_count; ++i)
{
reg = (struct vkd3d_shader_register *)&ins->src[i].reg;
if (shader_register_is_phase_instance_id(reg))
{
vsir_register_init(reg, VKD3DSPR_IMMCONST, reg->data_type, 0);
reg->u.immconst_u32[0] = instance_id;
continue;
}
shader_register_eliminate_phase_addressing(reg, instance_id);
}
for (i = 0; i < ins->dst_count; ++i)
shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id);
}
static const struct vkd3d_shader_varying_map *find_varying_map(
const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx)
{
unsigned int i;
for (i = 0; i < varying_map->varying_count; ++i)
{
if (varying_map->varying_map[i].output_signature_index == signature_idx)
return &varying_map->varying_map[i];
}
return NULL;
}
static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser,
const struct vkd3d_shader_compile_info *compile_info)
{
struct shader_signature *signature = &parser->shader_desc.output_signature;
const struct vkd3d_shader_varying_map_info *varying_map;
unsigned int i;
if (!(varying_map = vkd3d_find_struct(compile_info->next, VARYING_MAP_INFO)))
return VKD3D_OK;
for (i = 0; i < signature->element_count; ++i)
{
const struct vkd3d_shader_varying_map *map = find_varying_map(varying_map, i);
struct signature_element *e = &signature->elements[i];
if (map)
{
unsigned int input_mask = map->input_mask;
e->target_location = map->input_register_index;
/* It is illegal in Vulkan if the next shader uses the same varying
* location with a different mask. */
if (input_mask && input_mask != e->mask)
{
vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
"Aborting due to not yet implemented feature: "
"Output mask %#x does not match input mask %#x.",
e->mask, input_mask);
return VKD3D_ERROR_NOT_IMPLEMENTED;
}
}
else
{
e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED;
}
}
for (i = 0; i < varying_map->varying_count; ++i)
{
if (varying_map->varying_map[i].output_signature_index >= signature->element_count)
{
vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
"Aborting due to not yet implemented feature: "
"The next stage consumes varyings not written by this stage.");
return VKD3D_ERROR_NOT_IMPLEMENTED;
}
}
return VKD3D_OK;
}
struct hull_flattener
{
struct vkd3d_shader_instruction_array instructions;
unsigned int instance_count;
unsigned int phase_body_idx;
enum vkd3d_shader_opcode phase;
struct vkd3d_shader_location last_ret_location;
};
static bool flattener_is_in_fork_or_join_phase(const struct hull_flattener *flattener)
{
return flattener->phase == VKD3DSIH_HS_FORK_PHASE || flattener->phase == VKD3DSIH_HS_JOIN_PHASE;
}
struct shader_phase_location
{
unsigned int index;
unsigned int instance_count;
unsigned int instruction_count;
};
struct shader_phase_location_array
{
/* Unlikely worst case: one phase for each component of each output register. */
struct shader_phase_location locations[MAX_REG_OUTPUT * VKD3D_VEC4_SIZE];
unsigned int count;
};
static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normaliser,
unsigned int index, struct shader_phase_location_array *locations)
{
struct vkd3d_shader_instruction *ins = &normaliser->instructions.elements[index];
struct shader_phase_location *loc;
bool b;
if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE)
{
b = flattener_is_in_fork_or_join_phase(normaliser);
/* Reset the phase info. */
normaliser->phase_body_idx = ~0u;
normaliser->phase = ins->handler_idx;
normaliser->instance_count = 1;
/* Leave the first occurrence and delete the rest. */
if (b)
vkd3d_shader_instruction_make_nop(ins);
return;
}
else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT
|| ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT)
{
normaliser->instance_count = ins->declaration.count + !ins->declaration.count;
vkd3d_shader_instruction_make_nop(ins);
return;
}
else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id(
&ins->declaration.dst.reg))
{
vkd3d_shader_instruction_make_nop(ins);
return;
}
if (normaliser->phase == VKD3DSIH_INVALID || vsir_instruction_is_dcl(ins))
return;
if (normaliser->phase_body_idx == ~0u)
normaliser->phase_body_idx = index;
if (ins->handler_idx == VKD3DSIH_RET)
{
normaliser->last_ret_location = ins->location;
vkd3d_shader_instruction_make_nop(ins);
if (locations->count >= ARRAY_SIZE(locations->locations))
{
FIXME("Insufficient space for phase location.\n");
return;
}
loc = &locations->locations[locations->count++];
loc->index = normaliser->phase_body_idx;
loc->instance_count = normaliser->instance_count;
loc->instruction_count = index - normaliser->phase_body_idx;
}
}
static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normaliser,
struct shader_phase_location_array *locations)
{
struct shader_phase_location *loc;
unsigned int i, j, k, end, count;
for (i = 0, count = 0; i < locations->count; ++i)
count += (locations->locations[i].instance_count - 1) * locations->locations[i].instruction_count;
if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count))
return VKD3D_ERROR_OUT_OF_MEMORY;
end = normaliser->instructions.count;
normaliser->instructions.count += count;
for (i = locations->count; i > 0; --i)
{
loc = &locations->locations[i - 1];
j = loc->index + loc->instruction_count;
memmove(&normaliser->instructions.elements[j + count], &normaliser->instructions.elements[j],
(end - j) * sizeof(*normaliser->instructions.elements));
end = j;
count -= (loc->instance_count - 1) * loc->instruction_count;
loc->index += count;
}
for (i = 0, count = 0; i < locations->count; ++i)
{
loc = &locations->locations[i];
/* Make a copy of the non-dcl instructions for each instance. */
for (j = 1; j < loc->instance_count; ++j)
{
for (k = 0; k < loc->instruction_count; ++k)
{
if (!shader_instruction_array_clone_instruction(&normaliser->instructions,
loc->index + loc->instruction_count * j + k, loc->index + k))
return VKD3D_ERROR_OUT_OF_MEMORY;
}
}
/* Replace each reference to the instance id with a constant instance id. */
for (j = 0; j < loc->instance_count; ++j)
{
for (k = 0; k < loc->instruction_count; ++k)
shader_instruction_eliminate_phase_instance_id(
&normaliser->instructions.elements[loc->index + loc->instruction_count * j + k], j);
}
}
return VKD3D_OK;
}
void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type,
enum vkd3d_data_type data_type, unsigned int idx_count)
{
reg->type = reg_type;
reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT;
reg->non_uniform = false;
reg->data_type = data_type;
reg->idx[0].offset = ~0u;
reg->idx[0].rel_addr = NULL;
reg->idx[0].is_in_bounds = false;
reg->idx[1].offset = ~0u;
reg->idx[1].rel_addr = NULL;
reg->idx[1].is_in_bounds = false;
reg->idx[2].offset = ~0u;
reg->idx[2].rel_addr = NULL;
reg->idx[2].is_in_bounds = false;
reg->idx_count = idx_count;
reg->dimension = VSIR_DIMENSION_SCALAR;
reg->alignment = 0;
}
void vsir_src_param_init(struct vkd3d_shader_src_param *param, enum vkd3d_shader_register_type reg_type,
enum vkd3d_data_type data_type, unsigned int idx_count)
{
vsir_register_init(&param->reg, reg_type, data_type, idx_count);
param->swizzle = 0;
param->modifiers = VKD3DSPSM_NONE;
}
void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader_register_type reg_type,
enum vkd3d_data_type data_type, unsigned int idx_count)
{
vsir_register_init(&param->reg, reg_type, data_type, idx_count);
param->write_mask = VKD3DSP_WRITEMASK_0;
param->modifiers = VKD3DSPDM_NONE;
param->shift = 0;
}
void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id)
{
vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UINT, 1);
param->reg.dimension = VSIR_DIMENSION_NONE;
param->reg.idx[0].offset = label_id;
}
static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx)
{
vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1);
src->reg.idx[0].offset = idx;
}
static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx)
{
vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1);
dst->reg.idx[0].offset = idx;
}
static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx)
{
vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
dst->reg.idx[0].offset = idx;
dst->write_mask = VKD3DSP_WRITEMASK_0;
}
static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx)
{
vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
src->reg.idx[0].offset = idx;
}
static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32_t value)
{
vsir_src_param_init(src, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0);
src->reg.u.immconst_u32[0] = value;
}
void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location,
enum vkd3d_shader_opcode handler_idx)
{
memset(ins, 0, sizeof(*ins));
ins->location = *location;
ins->handler_idx = handler_idx;
}
static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins,
const struct vkd3d_shader_location *location, unsigned int label_id, struct vsir_program *program)
{
struct vkd3d_shader_src_param *src_param;
if (!(src_param = vsir_program_get_src_params(program, 1)))
return false;
vsir_src_param_init_label(src_param, label_id);
vsir_instruction_init(ins, location, VKD3DSIH_LABEL);
ins->src = src_param;
ins->src_count = 1;
return true;
}
static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions)
{
struct hull_flattener flattener = {*src_instructions};
struct vkd3d_shader_instruction_array *instructions;
struct shader_phase_location_array locations;
enum vkd3d_result result = VKD3D_OK;
unsigned int i;
instructions = &flattener.instructions;
flattener.phase = VKD3DSIH_INVALID;
for (i = 0, locations.count = 0; i < instructions->count; ++i)
flattener_eliminate_phase_related_dcls(&flattener, i, &locations);
if ((result = flattener_flatten_phases(&flattener, &locations)) < 0)
return result;
if (flattener.phase != VKD3DSIH_INVALID)
{
if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1))
return VKD3D_ERROR_OUT_OF_MEMORY;
vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET);
}
*src_instructions = flattener.instructions;
return result;
}
struct control_point_normaliser
{
struct vkd3d_shader_instruction_array instructions;
enum vkd3d_shader_opcode phase;
struct vkd3d_shader_src_param *outpointid_param;
};
static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser)
{
return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE;
}
static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param(
struct vkd3d_shader_instruction_array *instructions)
{
struct vkd3d_shader_src_param *rel_addr;
if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1)))
return NULL;
vsir_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0);
rel_addr->swizzle = 0;
rel_addr->modifiers = 0;
return rel_addr;
}
static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param,
struct control_point_normaliser *normaliser)
{
struct vkd3d_shader_register *reg = &dst_param->reg;
if (control_point_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT)
{
/* The TPF reader validates idx_count. */
assert(reg->idx_count == 1);
reg->idx[1] = reg->idx[0];
/* The control point id param is implicit here. Avoid later complications by inserting it. */
reg->idx[0].offset = 0;
reg->idx[0].rel_addr = normaliser->outpointid_param;
++reg->idx_count;
}
}
static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e,
enum vkd3d_shader_register_type reg_type, unsigned int idx_count)
{
param->write_mask = e->mask;
param->modifiers = 0;
param->shift = 0;
vsir_register_init(&param->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count);
}
static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser,
const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst,
const struct vkd3d_shader_location *location)
{
struct vkd3d_shader_instruction *ins;
struct vkd3d_shader_dst_param *param;
const struct signature_element *e;
unsigned int i, count;
for (i = 0, count = 1; i < s->element_count; ++i)
count += !!s->elements[i].used_mask;
if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count))
return VKD3D_ERROR_OUT_OF_MEMORY;
memmove(&normaliser->instructions.elements[dst + count], &normaliser->instructions.elements[dst],
(normaliser->instructions.count - dst) * sizeof(*normaliser->instructions.elements));
normaliser->instructions.count += count;
ins = &normaliser->instructions.elements[dst];
vsir_instruction_init(ins, location, VKD3DSIH_HS_CONTROL_POINT_PHASE);
ins->flags = 1;
++ins;
for (i = 0; i < s->element_count; ++i)
{
e = &s->elements[i];
if (!e->used_mask)
continue;
if (e->sysval_semantic != VKD3D_SHADER_SV_NONE)
{
vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT_SIV);
param = &ins->declaration.register_semantic.reg;
ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic);
}
else
{
vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT);
param = &ins->declaration.dst;
}
shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2);
param->reg.idx[0].offset = input_control_point_count;
param->reg.idx[1].offset = e->register_index;
param->write_mask = e->mask;
++ins;
}
return VKD3D_OK;
}
static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io(
struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature)
{
struct vkd3d_shader_instruction_array *instructions;
struct control_point_normaliser normaliser;
unsigned int input_control_point_count;
struct vkd3d_shader_location location;
struct vkd3d_shader_instruction *ins;
enum vkd3d_result ret;
unsigned int i, j;
if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions)))
{
ERR("Failed to allocate src param.\n");
return VKD3D_ERROR_OUT_OF_MEMORY;
}
normaliser.instructions = *src_instructions;
instructions = &normaliser.instructions;
normaliser.phase = VKD3DSIH_INVALID;
for (i = 0; i < normaliser.instructions.count; ++i)
{
ins = &instructions->elements[i];
switch (ins->handler_idx)
{
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
case VKD3DSIH_HS_FORK_PHASE:
case VKD3DSIH_HS_JOIN_PHASE:
normaliser.phase = ins->handler_idx;
break;
default:
if (vsir_instruction_is_dcl(ins))
break;
for (j = 0; j < ins->dst_count; ++j)
shader_dst_param_normalise_outpointid(&ins->dst[j], &normaliser);
break;
}
}
normaliser.phase = VKD3DSIH_INVALID;
input_control_point_count = 1;
for (i = 0; i < instructions->count; ++i)
{
ins = &instructions->elements[i];
switch (ins->handler_idx)
{
case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT:
input_control_point_count = ins->declaration.count;
break;
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
*src_instructions = normaliser.instructions;
return VKD3D_OK;
case VKD3DSIH_HS_FORK_PHASE:
case VKD3DSIH_HS_JOIN_PHASE:
/* ins may be relocated if the instruction array expands. */
location = ins->location;
ret = control_point_normaliser_emit_hs_input(&normaliser, input_signature,
input_control_point_count, i, &location);
*src_instructions = normaliser.instructions;
return ret;
default:
break;
}
}
*src_instructions = normaliser.instructions;
return VKD3D_OK;
}
struct io_normaliser
{
struct vkd3d_shader_instruction_array instructions;
enum vkd3d_shader_type shader_type;
uint8_t major;
struct shader_signature *input_signature;
struct shader_signature *output_signature;
struct shader_signature *patch_constant_signature;
unsigned int instance_count;
unsigned int phase_body_idx;
enum vkd3d_shader_opcode phase;
unsigned int output_control_point_count;
struct vkd3d_shader_src_param *outpointid_param;
struct vkd3d_shader_dst_param *input_dcl_params[MAX_REG_OUTPUT];
struct vkd3d_shader_dst_param *output_dcl_params[MAX_REG_OUTPUT];
struct vkd3d_shader_dst_param *pc_dcl_params[MAX_REG_OUTPUT];
uint8_t input_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE];
uint8_t output_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE];
uint8_t pc_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE];
bool use_vocp;
};
static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *normaliser)
{
return normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE;
}
static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser *normaliser)
{
return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE;
}
static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature,
unsigned int reg_idx, unsigned int write_mask)
{
unsigned int i, base_write_mask;
for (i = 0; i < signature->element_count; ++i)
{
struct signature_element *e = &signature->elements[i];
if (e->register_index <= reg_idx && e->register_index + e->register_count > reg_idx
&& (e->mask & write_mask) == write_mask)
{
return i;
}
}
/* Validated in the TPF reader, but failure in signature_element_range_expand_mask()
* can land us here on an unmatched vector mask. */
FIXME("Failed to find signature element for register index %u, mask %#x; using scalar mask.\n",
reg_idx, write_mask);
base_write_mask = 1u << vsir_write_mask_get_component_idx(write_mask);
if (base_write_mask != write_mask)
return shader_signature_find_element_for_reg(signature, reg_idx, base_write_mask);
vkd3d_unreachable();
}
struct signature_element *vsir_signature_find_element_for_reg(const struct shader_signature *signature,
unsigned int reg_idx, unsigned int write_mask)
{
return &signature->elements[shader_signature_find_element_for_reg(signature, reg_idx, write_mask)];
}
static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE],
unsigned int register_idx, uint32_t write_mask)
{
return range_map[register_idx][vsir_write_mask_get_component_idx(write_mask)];
}
static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], unsigned int register_idx,
unsigned int register_count, uint32_t write_mask, bool is_dcl_indexrange)
{
unsigned int i, j, r, c, component_idx, component_count;
assert(write_mask <= VKD3DSP_WRITEMASK_ALL);
component_idx = vsir_write_mask_get_component_idx(write_mask);
component_count = vsir_write_mask_component_count(write_mask);
assert(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count);
if (range_map[register_idx][component_idx] > register_count && is_dcl_indexrange)
{
/* Validated in the TPF reader. */
assert(range_map[register_idx][component_idx] != UINT8_MAX);
return;
}
if (range_map[register_idx][component_idx] == register_count)
{
/* Already done. This happens when fxc splits a register declaration by
* component(s). The dcl_indexrange instructions are split too. */
return;
}
range_map[register_idx][component_idx] = register_count;
for (i = 0; i < register_count; ++i)
{
r = register_idx + i;
for (j = !i; j < component_count; ++j)
{
c = component_idx + j;
/* A synthetic patch constant range which overlaps an existing range can start upstream of it
* for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap.
* The latter is validated in the TPF reader. */
assert(!range_map[r][c] || !is_dcl_indexrange);
range_map[r][c] = UINT8_MAX;
}
}
}
static void io_normaliser_add_index_range(struct io_normaliser *normaliser,
const struct vkd3d_shader_instruction *ins)
{
const struct vkd3d_shader_index_range *range = &ins->declaration.index_range;
const struct vkd3d_shader_register *reg = &range->dst.reg;
unsigned int reg_idx, write_mask, element_idx;
const struct shader_signature *signature;
uint8_t (*range_map)[VKD3D_VEC4_SIZE];
switch (reg->type)
{
case VKD3DSPR_INPUT:
case VKD3DSPR_INCONTROLPOINT:
range_map = normaliser->input_range_map;
signature = normaliser->input_signature;
break;
case VKD3DSPR_OUTCONTROLPOINT:
range_map = normaliser->output_range_map;
signature = normaliser->output_signature;
break;
case VKD3DSPR_OUTPUT:
if (!io_normaliser_is_in_fork_or_join_phase(normaliser))
{
range_map = normaliser->output_range_map;
signature = normaliser->output_signature;
break;
}
/* fall through */
case VKD3DSPR_PATCHCONST:
range_map = normaliser->pc_range_map;
signature = normaliser->patch_constant_signature;
break;
default:
/* Validated in the TPF reader. */
vkd3d_unreachable();
}
reg_idx = reg->idx[reg->idx_count - 1].offset;
write_mask = range->dst.write_mask;
element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask);
range_map_set_register_range(range_map, reg_idx, range->register_count,
signature->elements[element_idx].mask, true);
}
static int signature_element_mask_compare(const void *a, const void *b)
{
const struct signature_element *e = a, *f = b;
int ret;
return (ret = vkd3d_u32_compare(e->mask, f->mask)) ? ret : vkd3d_u32_compare(e->register_index, f->register_index);
}
static bool sysval_semantics_should_merge(const struct signature_element *e, const struct signature_element *f)
{
if (e->sysval_semantic < VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE
|| e->sysval_semantic > VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN)
return false;
return e->sysval_semantic == f->sysval_semantic
/* Line detail and density must be merged together to match the SPIR-V array.
* This deletes one of the two sysvals, but these are not used. */
|| (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET
&& f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN)
|| (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN
&& f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET);
}
/* Merge tess factor sysvals because they are an array in SPIR-V. */
static void shader_signature_map_patch_constant_index_ranges(struct shader_signature *s,
uint8_t range_map[][VKD3D_VEC4_SIZE])
{
struct signature_element *e, *f;
unsigned int i, j, register_count;
qsort(s->elements, s->element_count, sizeof(s->elements[0]), signature_element_mask_compare);
for (i = 0; i < s->element_count; i += register_count)
{
e = &s->elements[i];
register_count = 1;
if (!e->sysval_semantic)
continue;
for (j = i + 1; j < s->element_count; ++j, ++register_count)
{
f = &s->elements[j];
if (f->register_index != e->register_index + register_count || !sysval_semantics_should_merge(e, f))
break;
}
if (register_count < 2)
continue;
range_map_set_register_range(range_map, e->register_index, register_count, e->mask, false);
}
}
static int signature_element_register_compare(const void *a, const void *b)
{
const struct signature_element *e = a, *f = b;
return vkd3d_u32_compare(e->register_index, f->register_index);
}
static int signature_element_index_compare(const void *a, const void *b)
{
const struct signature_element *e = a, *f = b;
return vkd3d_u32_compare(e->sort_index, f->sort_index);
}
static unsigned int signature_element_range_expand_mask(struct signature_element *e, unsigned int register_count,
uint8_t range_map[][VKD3D_VEC4_SIZE])
{
unsigned int i, j, component_idx, component_count, merged_write_mask = e->mask;
/* dcl_indexrange instructions can declare a subset of the full mask, and the masks of
* the elements within the range may differ. TPF's handling of arrayed inputs with
* dcl_indexrange is really just a hack. Here we create a mask which covers all element
* masks, and check for collisions with other ranges. */
for (i = 1; i < register_count; ++i)
merged_write_mask |= e[i].mask;
if (merged_write_mask == e->mask)
return merged_write_mask;
/* Reaching this point is very rare to begin with, and collisions are even rarer or
* impossible. If the latter shows up, the fallback in shader_signature_find_element_for_reg()
* may be sufficient. */
component_idx = vsir_write_mask_get_component_idx(e->mask);
component_count = vsir_write_mask_component_count(e->mask);
for (i = e->register_index; i < e->register_index + register_count; ++i)
{
for (j = 0; j < component_idx; ++j)
if (range_map[i][j])
break;
for (j = component_idx + component_count; j < VKD3D_VEC4_SIZE; ++j)
if (range_map[i][j])
break;
}
if (i == register_count)
{
WARN("Expanding mask %#x to %#x for %s, base reg %u, count %u.\n", e->mask, merged_write_mask,
e->semantic_name, e->register_index, register_count);
return merged_write_mask;
}
WARN("Cannot expand mask %#x to %#x for %s, base reg %u, count %u.\n", e->mask, merged_write_mask,
e->semantic_name, e->register_index, register_count);
return e->mask;
}
static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map[][VKD3D_VEC4_SIZE],
bool is_patch_constant)
{
unsigned int i, j, element_count, new_count, register_count;
struct signature_element *elements;
struct signature_element *e, *f;
bool used;
element_count = s->element_count;
if (!(elements = vkd3d_malloc(element_count * sizeof(*elements))))
return false;
memcpy(elements, s->elements, element_count * sizeof(*elements));
qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare);
for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e)
{
e = &elements[i];
j = i + 1;
if (e->register_index == ~0u)
continue;
/* Do not merge if the register index will be relative-addressed. */
if (range_map_get_register_count(range_map, e->register_index, e->mask) > 1)
continue;
used = e->used_mask;
for (; j < element_count; ++j)
{
f = &elements[j];
/* Merge different components of the same register unless sysvals are different,
* or it will be relative-addressed. */
if (f->register_index != e->register_index || f->sysval_semantic != e->sysval_semantic
|| range_map_get_register_count(range_map, f->register_index, f->mask) > 1)
break;
TRACE("Merging %s, reg %u, mask %#x, sysval %#x with %s, mask %#x, sysval %#x.\n", e->semantic_name,
e->register_index, e->mask, e->sysval_semantic, f->semantic_name, f->mask, f->sysval_semantic);
assert(!(e->mask & f->mask));
e->mask |= f->mask;
e->used_mask |= f->used_mask;
e->semantic_index = min(e->semantic_index, f->semantic_index);
/* The first element may have no interpolation mode if it is unused. Elements which
* actually have different interpolation modes are assigned different registers. */
if (f->used_mask && !used)
{
if (e->interpolation_mode && e->interpolation_mode != f->interpolation_mode)
FIXME("Mismatching interpolation modes %u and %u.\n", e->interpolation_mode, f->interpolation_mode);
else
e->interpolation_mode = f->interpolation_mode;
}
}
}
element_count = new_count;
vkd3d_free(s->elements);
s->elements = elements;
s->element_count = element_count;
if (is_patch_constant)
shader_signature_map_patch_constant_index_ranges(s, range_map);
for (i = 0, new_count = 0; i < element_count; i += register_count, elements[new_count++] = *e)
{
e = &elements[i];
register_count = 1;
if (e->register_index >= MAX_REG_OUTPUT)
continue;
register_count = range_map_get_register_count(range_map, e->register_index, e->mask);
assert(register_count != UINT8_MAX);
register_count += !register_count;
if (register_count > 1)
{
TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count);
e->register_count = register_count;
e->mask = signature_element_range_expand_mask(e, register_count, range_map);
}
}
element_count = new_count;
/* Restoring the original order is required for sensible trace output. */
qsort(elements, element_count, sizeof(elements[0]), signature_element_index_compare);
s->element_count = element_count;
return true;
}
static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg,
unsigned int id_idx, unsigned int register_index)
{
assert(id_idx < ARRAY_SIZE(reg->idx) - 1);
/* For a relative-addressed register index, move the id up a slot to separate it from the address,
* because rel_addr can be replaced with a constant offset in some cases. */
if (reg->idx[id_idx].rel_addr)
{
reg->idx[id_idx + 1].rel_addr = NULL;
reg->idx[id_idx + 1].offset = reg->idx[id_idx].offset;
reg->idx[id_idx].offset -= register_index;
if (id_idx)
{
/* idx[id_idx] now contains the array index, which must be moved below the control point id. */
struct vkd3d_shader_register_index tmp = reg->idx[id_idx];
reg->idx[id_idx] = reg->idx[id_idx - 1];
reg->idx[id_idx - 1] = tmp;
}
++id_idx;
}
/* Otherwise we have no address for the arrayed register, so insert one. This happens e.g. where
* tessellation level registers are merged into an array because they're an array in SPIR-V. */
else
{
++id_idx;
memmove(&reg->idx[1], &reg->idx[0], id_idx * sizeof(reg->idx[0]));
reg->idx[0].rel_addr = NULL;
reg->idx[0].offset = reg->idx[id_idx].offset - register_index;
}
return id_idx;
}
static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, bool is_io_dcl,
struct io_normaliser *normaliser)
{
unsigned int id_idx, reg_idx, write_mask, element_idx;
struct vkd3d_shader_register *reg = &dst_param->reg;
struct vkd3d_shader_dst_param **dcl_params;
const struct shader_signature *signature;
const struct signature_element *e;
switch (reg->type)
{
case VKD3DSPR_OUTPUT:
reg_idx = reg->idx[reg->idx_count - 1].offset;
if (io_normaliser_is_in_fork_or_join_phase(normaliser))
{
signature = normaliser->patch_constant_signature;
/* Convert patch constant outputs to the patch constant register type to avoid the need
* to convert compiler symbols when accessed as inputs in a later stage. */
reg->type = VKD3DSPR_PATCHCONST;
dcl_params = normaliser->pc_dcl_params;
}
else
{
signature = normaliser->output_signature;
dcl_params = normaliser->output_dcl_params;
}
break;
case VKD3DSPR_PATCHCONST:
reg_idx = reg->idx[reg->idx_count - 1].offset;
signature = normaliser->patch_constant_signature;
dcl_params = normaliser->pc_dcl_params;
break;
case VKD3DSPR_COLOROUT:
reg_idx = reg->idx[0].offset;
signature = normaliser->output_signature;
reg->type = VKD3DSPR_OUTPUT;
dcl_params = normaliser->output_dcl_params;
break;
case VKD3DSPR_INCONTROLPOINT:
case VKD3DSPR_INPUT:
reg_idx = reg->idx[reg->idx_count - 1].offset;
signature = normaliser->input_signature;
reg->type = VKD3DSPR_INPUT;
dcl_params = normaliser->input_dcl_params;
break;
case VKD3DSPR_ATTROUT:
reg_idx = SM1_COLOR_REGISTER_OFFSET + reg->idx[0].offset;
signature = normaliser->output_signature;
reg->type = VKD3DSPR_OUTPUT;
dcl_params = normaliser->output_dcl_params;
break;
case VKD3DSPR_RASTOUT:
reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset;
signature = normaliser->output_signature;
reg->type = VKD3DSPR_OUTPUT;
dcl_params = normaliser->output_dcl_params;
break;
default:
return true;
}
id_idx = reg->idx_count - 1;
write_mask = dst_param->write_mask;
element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask);
e = &signature->elements[element_idx];
dst_param->write_mask >>= vsir_write_mask_get_component_idx(e->mask);
if (is_io_dcl)
{
/* Validated in the TPF reader. */
assert(element_idx < ARRAY_SIZE(normaliser->input_dcl_params));
if (dcl_params[element_idx])
{
/* Merge split declarations into a single one. */
dcl_params[element_idx]->write_mask |= dst_param->write_mask;
/* Turn this into a nop. */
return false;
}
else
{
dcl_params[element_idx] = dst_param;
}
}
if (io_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT)
{
if (is_io_dcl)
{
/* Emit an array size for the control points for consistency with inputs. */
reg->idx[0].offset = normaliser->output_control_point_count;
}
else
{
/* The control point id param. */
assert(reg->idx[0].rel_addr);
}
id_idx = 1;
}
if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic)))
{
if (is_io_dcl)
{
/* For control point I/O, idx 0 contains the control point count.
* Ensure it is moved up to the next slot. */
reg->idx[id_idx].offset = reg->idx[0].offset;
reg->idx[0].offset = e->register_count;
++id_idx;
}
else
{
id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index);
}
}
/* Replace the register index with the signature element index */
reg->idx[id_idx].offset = element_idx;
reg->idx_count = id_idx + 1;
return true;
}
static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_param,
struct io_normaliser *normaliser)
{
unsigned int i, id_idx, reg_idx, write_mask, element_idx, component_idx;
struct vkd3d_shader_register *reg = &src_param->reg;
const struct shader_signature *signature;
const struct signature_element *e;
/* Input/output registers from one phase can be used as inputs in
* subsequent phases. Specifically:
*
* - Control phase inputs are available as "vicp" in fork and join
* phases.
* - Control phase outputs are available as "vocp" in fork and join
* phases.
* - Fork phase patch constants are available as "vpc" in join
* phases.
*
* We handle "vicp" here by converting INCONTROLPOINT src registers to
* type INPUT so they match the control phase declarations. We handle
* "vocp" by converting OUTCONTROLPOINT registers to type OUTPUT.
* Merging fork and join phases handles "vpc". */
switch (reg->type)
{
case VKD3DSPR_PATCHCONST:
reg_idx = reg->idx[reg->idx_count - 1].offset;
signature = normaliser->patch_constant_signature;
break;
case VKD3DSPR_INCONTROLPOINT:
reg->type = VKD3DSPR_INPUT;
/* fall through */
case VKD3DSPR_INPUT:
if (normaliser->major < 3 && normaliser->shader_type == VKD3D_SHADER_TYPE_PIXEL)
reg_idx = SM1_COLOR_REGISTER_OFFSET + reg->idx[0].offset;
else
reg_idx = reg->idx[reg->idx_count - 1].offset;
signature = normaliser->input_signature;
break;
case VKD3DSPR_OUTCONTROLPOINT:
reg->type = VKD3DSPR_OUTPUT;
/* fall through */
case VKD3DSPR_OUTPUT:
reg_idx = reg->idx[reg->idx_count - 1].offset;
signature = normaliser->output_signature;
break;
case VKD3DSPR_TEXTURE:
if (normaliser->shader_type != VKD3D_SHADER_TYPE_PIXEL)
return;
reg->type = VKD3DSPR_INPUT;
reg_idx = reg->idx[0].offset;
signature = normaliser->input_signature;
break;
default:
return;
}
id_idx = reg->idx_count - 1;
write_mask = VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(src_param->swizzle, 0);
element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask);
e = &signature->elements[element_idx];
if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic)))
id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index);
reg->idx[id_idx].offset = element_idx;
reg->idx_count = id_idx + 1;
if ((component_idx = vsir_write_mask_get_component_idx(e->mask)))
{
for (i = 0; i < VKD3D_VEC4_SIZE; ++i)
if (vsir_swizzle_get_component(src_param->swizzle, i))
src_param->swizzle -= component_idx << VKD3D_SHADER_SWIZZLE_SHIFT(i);
}
}
static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins,
struct io_normaliser *normaliser)
{
struct vkd3d_shader_register *reg;
unsigned int i;
switch (ins->handler_idx)
{
case VKD3DSIH_DCL_INPUT:
if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL)
{
reg = &ins->declaration.dst.reg;
if (reg->type == VKD3DSPR_OUTCONTROLPOINT)
normaliser->use_vocp = true;
/* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their
* equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */
if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST)
vkd3d_shader_instruction_make_nop(ins);
else if (reg->type == VKD3DSPR_INCONTROLPOINT)
reg->type = VKD3DSPR_INPUT;
}
/* fall through */
case VKD3DSIH_DCL_INPUT_PS:
case VKD3DSIH_DCL_OUTPUT:
if (!shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser))
vkd3d_shader_instruction_make_nop(ins);
break;
case VKD3DSIH_DCL_INPUT_SGV:
case VKD3DSIH_DCL_INPUT_SIV:
case VKD3DSIH_DCL_INPUT_PS_SGV:
case VKD3DSIH_DCL_INPUT_PS_SIV:
case VKD3DSIH_DCL_OUTPUT_SIV:
if (!shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, normaliser))
vkd3d_shader_instruction_make_nop(ins);
break;
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
case VKD3DSIH_HS_FORK_PHASE:
case VKD3DSIH_HS_JOIN_PHASE:
normaliser->phase = ins->handler_idx;
memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params));
memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params));
memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params));
break;
default:
if (vsir_instruction_is_dcl(ins))
break;
for (i = 0; i < ins->dst_count; ++i)
shader_dst_param_io_normalise(&ins->dst[i], false, normaliser);
for (i = 0; i < ins->src_count; ++i)
shader_src_param_io_normalise(&ins->src[i], normaliser);
break;
}
}
static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parser *parser)
{
struct io_normaliser normaliser = {parser->program.instructions};
struct vsir_program *program = &parser->program;
struct vkd3d_shader_instruction *ins;
bool has_control_point_phase;
unsigned int i, j;
normaliser.phase = VKD3DSIH_INVALID;
normaliser.shader_type = program->shader_version.type;
normaliser.major = program->shader_version.major;
normaliser.input_signature = &parser->shader_desc.input_signature;
normaliser.output_signature = &parser->shader_desc.output_signature;
normaliser.patch_constant_signature = &parser->shader_desc.patch_constant_signature;
for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i)
{
ins = &program->instructions.elements[i];
switch (ins->handler_idx)
{
case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT:
normaliser.output_control_point_count = ins->declaration.count;
break;
case VKD3DSIH_DCL_INDEX_RANGE:
io_normaliser_add_index_range(&normaliser, ins);
vkd3d_shader_instruction_make_nop(ins);
break;
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
has_control_point_phase = true;
/* fall through */
case VKD3DSIH_HS_FORK_PHASE:
case VKD3DSIH_HS_JOIN_PHASE:
normaliser.phase = ins->handler_idx;
break;
default:
break;
}
}
if (normaliser.shader_type == VKD3D_SHADER_TYPE_HULL && !has_control_point_phase)
{
/* Inputs and outputs must match for the default phase, so merge ranges must match too. */
for (i = 0; i < MAX_REG_OUTPUT; ++i)
{
for (j = 0; j < VKD3D_VEC4_SIZE; ++j)
{
if (!normaliser.input_range_map[i][j] && normaliser.output_range_map[i][j])
normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j];
else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j])
normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j];
else assert(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]);
}
}
}
if (!shader_signature_merge(&parser->shader_desc.input_signature, normaliser.input_range_map, false)
|| !shader_signature_merge(&parser->shader_desc.output_signature, normaliser.output_range_map, false)
|| !shader_signature_merge(&parser->shader_desc.patch_constant_signature, normaliser.pc_range_map, true))
{
program->instructions = normaliser.instructions;
return VKD3D_ERROR_OUT_OF_MEMORY;
}
normaliser.phase = VKD3DSIH_INVALID;
for (i = 0; i < normaliser.instructions.count; ++i)
shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser);
program->instructions = normaliser.instructions;
program->use_vocp = normaliser.use_vocp;
return VKD3D_OK;
}
struct flat_constant_def
{
enum vkd3d_shader_d3dbc_constant_register set;
uint32_t index;
uint32_t value[4];
};
struct flat_constants_normaliser
{
struct flat_constant_def *defs;
size_t def_count, defs_capacity;
};
static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg,
enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index)
{
static const struct
{
enum vkd3d_shader_register_type type;
enum vkd3d_shader_d3dbc_constant_register set;
uint32_t offset;
}
regs[] =
{
{VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0},
{VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048},
{VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096},
{VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144},
{VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0},
{VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0},
};
unsigned int i;
for (i = 0; i < ARRAY_SIZE(regs); ++i)
{
if (reg->type == regs[i].type)
{
if (reg->idx[0].rel_addr)
{
FIXME("Unhandled relative address.\n");
return false;
}
*set = regs[i].set;
*index = regs[i].offset + reg->idx[0].offset;
return true;
}
}
return false;
}
static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_param *param,
const struct flat_constants_normaliser *normaliser)
{
enum vkd3d_shader_d3dbc_constant_register set;
uint32_t index;
size_t i, j;
if (!get_flat_constant_register_type(&param->reg, &set, &index))
return;
for (i = 0; i < normaliser->def_count; ++i)
{
if (normaliser->defs[i].set == set && normaliser->defs[i].index == index)
{
param->reg.type = VKD3DSPR_IMMCONST;
param->reg.idx_count = 0;
param->reg.dimension = VSIR_DIMENSION_VEC4;
for (j = 0; j < 4; ++j)
param->reg.u.immconst_u32[j] = normaliser->defs[i].value[j];
return;
}
}
param->reg.type = VKD3DSPR_CONSTBUFFER;
param->reg.idx[0].offset = set; /* register ID */
param->reg.idx[1].offset = set; /* register index */
param->reg.idx[2].offset = index; /* buffer index */
param->reg.idx_count = 3;
}
static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_program *program)
{
struct flat_constants_normaliser normaliser = {0};
unsigned int i, j;
for (i = 0; i < program->instructions.count; ++i)
{
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB)
{
struct flat_constant_def *def;
if (!vkd3d_array_reserve((void **)&normaliser.defs, &normaliser.defs_capacity,
normaliser.def_count + 1, sizeof(*normaliser.defs)))
{
vkd3d_free(normaliser.defs);
return VKD3D_ERROR_OUT_OF_MEMORY;
}
def = &normaliser.defs[normaliser.def_count++];
get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index);
for (j = 0; j < 4; ++j)
def->value[j] = ins->src[0].reg.u.immconst_u32[j];
vkd3d_shader_instruction_make_nop(ins);
}
else
{
for (j = 0; j < ins->src_count; ++j)
shader_register_normalise_flat_constants(&ins->src[j], &normaliser);
}
}
vkd3d_free(normaliser.defs);
return VKD3D_OK;
}
static void remove_dead_code(struct vsir_program *program)
{
size_t i, depth = 0;
bool dead = false;
for (i = 0; i < program->instructions.count; ++i)
{
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
switch (ins->handler_idx)
{
case VKD3DSIH_IF:
case VKD3DSIH_LOOP:
case VKD3DSIH_SWITCH:
if (dead)
{
vkd3d_shader_instruction_make_nop(ins);
++depth;
}
break;
case VKD3DSIH_ENDIF:
case VKD3DSIH_ENDLOOP:
case VKD3DSIH_ENDSWITCH:
case VKD3DSIH_ELSE:
if (dead)
{
if (depth > 0)
{
if (ins->handler_idx != VKD3DSIH_ELSE)
--depth;
vkd3d_shader_instruction_make_nop(ins);
}
else
{
dead = false;
}
}
break;
/* `depth' is counted with respect to where the dead code
* segment began. So it starts at zero and it signals the
* termination of the dead code segment when it would
* become negative. */
case VKD3DSIH_BREAK:
case VKD3DSIH_RET:
case VKD3DSIH_CONTINUE:
if (dead)
{
vkd3d_shader_instruction_make_nop(ins);
}
else
{
dead = true;
depth = 0;
}
break;
/* If `case' or `default' appears at zero depth, it means
* that they are a possible target for the corresponding
* switch, so the code is live again. */
case VKD3DSIH_CASE:
case VKD3DSIH_DEFAULT:
if (dead)
{
if (depth == 0)
dead = false;
else
vkd3d_shader_instruction_make_nop(ins);
}
break;
/* Phase instructions can only appear in hull shaders and
* outside of any block. When a phase returns, control is
* moved to the following phase, so they make code live
* again. */
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
case VKD3DSIH_HS_FORK_PHASE:
case VKD3DSIH_HS_JOIN_PHASE:
dead = false;
break;
default:
if (dead)
vkd3d_shader_instruction_make_nop(ins);
break;
}
}
}
static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser *parser)
{
unsigned int i;
for (i = 0; i < parser->program.instructions.count; ++i)
{
struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i];
struct vkd3d_shader_src_param *srcs;
switch (ins->handler_idx)
{
case VKD3DSIH_TEX:
if (!(srcs = shader_src_param_allocator_get(&parser->program.instructions.src_params, 3)))
return VKD3D_ERROR_OUT_OF_MEMORY;
memset(srcs, 0, sizeof(*srcs) * 3);
ins->handler_idx = VKD3DSIH_SAMPLE;
srcs[0] = ins->src[0];
srcs[1].reg.type = VKD3DSPR_RESOURCE;
srcs[1].reg.idx[0] = ins->src[1].reg.idx[0];
srcs[1].reg.idx[1] = ins->src[1].reg.idx[0];
srcs[1].reg.idx_count = 2;
srcs[1].reg.data_type = VKD3D_DATA_RESOURCE;
srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE;
srcs[2].reg.type = VKD3DSPR_SAMPLER;
srcs[2].reg.idx[0] = ins->src[1].reg.idx[0];
srcs[2].reg.idx[1] = ins->src[1].reg.idx[0];
srcs[2].reg.idx_count = 2;
srcs[2].reg.data_type = VKD3D_DATA_SAMPLER;
ins->src = srcs;
ins->src_count = 3;
break;
case VKD3DSIH_TEXBEM:
case VKD3DSIH_TEXBEML:
case VKD3DSIH_TEXCOORD:
case VKD3DSIH_TEXDEPTH:
case VKD3DSIH_TEXDP3:
case VKD3DSIH_TEXDP3TEX:
case VKD3DSIH_TEXLDD:
case VKD3DSIH_TEXLDL:
case VKD3DSIH_TEXM3x2PAD:
case VKD3DSIH_TEXM3x2TEX:
case VKD3DSIH_TEXM3x3DIFF:
case VKD3DSIH_TEXM3x3PAD:
case VKD3DSIH_TEXM3x3SPEC:
case VKD3DSIH_TEXM3x3TEX:
case VKD3DSIH_TEXM3x3VSPEC:
case VKD3DSIH_TEXREG2AR:
case VKD3DSIH_TEXREG2GB:
case VKD3DSIH_TEXREG2RGB:
vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
"Aborting due to not yet implemented feature: "
"Combined sampler instruction %#x.", ins->handler_idx);
return VKD3D_ERROR_NOT_IMPLEMENTED;
default:
break;
}
}
return VKD3D_OK;
}
struct cf_flattener_if_info
{
struct vkd3d_shader_src_param *false_param;
unsigned int id;
uint32_t merge_block_id;
unsigned int else_block_id;
};
struct cf_flattener_loop_info
{
unsigned int header_block_id;
unsigned int continue_block_id;
uint32_t merge_block_id;
};
struct cf_flattener_switch_case
{
unsigned int value;
unsigned int block_id;
};
struct cf_flattener_switch_info
{
size_t ins_location;
const struct vkd3d_shader_src_param *condition;
unsigned int id;
unsigned int merge_block_id;
unsigned int default_block_id;
struct cf_flattener_switch_case *cases;
size_t cases_size;
unsigned int cases_count;
};
struct cf_flattener_info
{
union
{
struct cf_flattener_if_info if_;
struct cf_flattener_loop_info loop;
struct cf_flattener_switch_info switch_;
} u;
enum
{
VKD3D_BLOCK_IF,
VKD3D_BLOCK_LOOP,
VKD3D_BLOCK_SWITCH,
} current_block;
bool inside_block;
};
struct cf_flattener
{
struct vkd3d_shader_parser *parser;
struct vkd3d_shader_location location;
bool allocation_failed;
struct vkd3d_shader_instruction *instructions;
size_t instruction_capacity;
size_t instruction_count;
unsigned int block_id;
const char **block_names;
size_t block_name_capacity;
size_t block_name_count;
unsigned int branch_id;
unsigned int loop_id;
unsigned int switch_id;
unsigned int control_flow_depth;
struct cf_flattener_info *control_flow_info;
size_t control_flow_info_size;
};
static struct vkd3d_shader_instruction *cf_flattener_require_space(struct cf_flattener *flattener, size_t count)
{
if (!vkd3d_array_reserve((void **)&flattener->instructions, &flattener->instruction_capacity,
flattener->instruction_count + count, sizeof(*flattener->instructions)))
{
ERR("Failed to allocate instructions.\n");
flattener->allocation_failed = true;
return NULL;
}
return &flattener->instructions[flattener->instruction_count];
}
static bool cf_flattener_copy_instruction(struct cf_flattener *flattener,
const struct vkd3d_shader_instruction *instruction)
{
struct vkd3d_shader_instruction *dst_ins;
if (instruction->handler_idx == VKD3DSIH_NOP)
return true;
if (!(dst_ins = cf_flattener_require_space(flattener, 1)))
return false;
*dst_ins = *instruction;
++flattener->instruction_count;
return true;
}
static unsigned int cf_flattener_alloc_block_id(struct cf_flattener *flattener)
{
return ++flattener->block_id;
}
static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_shader_instruction *ins,
unsigned int count, struct cf_flattener *flattener)
{
struct vkd3d_shader_src_param *params;
if (!(params = vsir_program_get_src_params(&flattener->parser->program, count)))
{
flattener->allocation_failed = true;
return NULL;
}
ins->src = params;
ins->src_count = count;
return params;
}
static void cf_flattener_emit_label(struct cf_flattener *flattener, unsigned int label_id)
{
struct vkd3d_shader_instruction *ins;
if (!(ins = cf_flattener_require_space(flattener, 1)))
return;
if (vsir_instruction_init_label(ins, &flattener->location, label_id, &flattener->parser->program))
++flattener->instruction_count;
else
flattener->allocation_failed = true;
}
/* For conditional branches, this returns the false target branch parameter. */
static struct vkd3d_shader_src_param *cf_flattener_emit_branch(struct cf_flattener *flattener,
unsigned int merge_block_id, unsigned int continue_block_id,
const struct vkd3d_shader_src_param *condition, unsigned int true_id, unsigned int false_id,
unsigned int flags)
{
struct vkd3d_shader_src_param *src_params, *false_branch_param;
struct vkd3d_shader_instruction *ins;
if (!(ins = cf_flattener_require_space(flattener, 1)))
return NULL;
vsir_instruction_init(ins, &flattener->location, VKD3DSIH_BRANCH);
if (condition)
{
if (!(src_params = instruction_src_params_alloc(ins, 4 + !!continue_block_id, flattener)))
return NULL;
src_params[0] = *condition;
if (flags == VKD3D_SHADER_CONDITIONAL_OP_Z)
{
vsir_src_param_init_label(&src_params[1], false_id);
vsir_src_param_init_label(&src_params[2], true_id);
false_branch_param = &src_params[1];
}
else
{
vsir_src_param_init_label(&src_params[1], true_id);
vsir_src_param_init_label(&src_params[2], false_id);
false_branch_param = &src_params[2];
}
vsir_src_param_init_label(&src_params[3], merge_block_id);
if (continue_block_id)
vsir_src_param_init_label(&src_params[4], continue_block_id);
}
else
{
if (!(src_params = instruction_src_params_alloc(ins, merge_block_id ? 3 : 1, flattener)))
return NULL;
vsir_src_param_init_label(&src_params[0], true_id);
if (merge_block_id)
{
/* An unconditional branch may only have merge information for a loop, which
* must have both a merge block and continue block. */
vsir_src_param_init_label(&src_params[1], merge_block_id);
vsir_src_param_init_label(&src_params[2], continue_block_id);
}
false_branch_param = NULL;
}
++flattener->instruction_count;
return false_branch_param;
}
static void cf_flattener_emit_conditional_branch_and_merge(struct cf_flattener *flattener,
const struct vkd3d_shader_src_param *condition, unsigned int true_id, unsigned int flags)
{
unsigned int merge_block_id;
merge_block_id = cf_flattener_alloc_block_id(flattener);
cf_flattener_emit_branch(flattener, merge_block_id, 0, condition, true_id, merge_block_id, flags);
cf_flattener_emit_label(flattener, merge_block_id);
}
static void cf_flattener_emit_unconditional_branch(struct cf_flattener *flattener, unsigned int target_block_id)
{
cf_flattener_emit_branch(flattener, 0, 0, NULL, target_block_id, 0, 0);
}
static struct cf_flattener_info *cf_flattener_push_control_flow_level(struct cf_flattener *flattener)
{
if (!vkd3d_array_reserve((void **)&flattener->control_flow_info, &flattener->control_flow_info_size,
flattener->control_flow_depth + 1, sizeof(*flattener->control_flow_info)))
{
ERR("Failed to allocate control flow info structure.\n");
flattener->allocation_failed = true;
return NULL;
}
return &flattener->control_flow_info[flattener->control_flow_depth++];
}
static void cf_flattener_pop_control_flow_level(struct cf_flattener *flattener)
{
struct cf_flattener_info *cf_info;
cf_info = &flattener->control_flow_info[--flattener->control_flow_depth];
memset(cf_info, 0, sizeof(*cf_info));
}
static struct cf_flattener_info *cf_flattener_find_innermost_loop(struct cf_flattener *flattener)
{
int depth;
for (depth = flattener->control_flow_depth - 1; depth >= 0; --depth)
{
if (flattener->control_flow_info[depth].current_block == VKD3D_BLOCK_LOOP)
return &flattener->control_flow_info[depth];
}
return NULL;
}
static struct cf_flattener_info *cf_flattener_find_innermost_breakable_cf_construct(struct cf_flattener *flattener)
{
int depth;
for (depth = flattener->control_flow_depth - 1; depth >= 0; --depth)
{
if (flattener->control_flow_info[depth].current_block == VKD3D_BLOCK_LOOP
|| flattener->control_flow_info[depth].current_block == VKD3D_BLOCK_SWITCH)
return &flattener->control_flow_info[depth];
}
return NULL;
}
static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_flattener *flattener,
unsigned int block_id, const char *fmt, ...)
{
struct vkd3d_string_buffer buffer;
size_t block_name_count;
va_list args;
--block_id;
block_name_count = max(flattener->block_name_count, block_id + 1);
if (!vkd3d_array_reserve((void **)&flattener->block_names, &flattener->block_name_capacity,
block_name_count, sizeof(*flattener->block_names)))
return;
memset(&flattener->block_names[flattener->block_name_count], 0,
(block_name_count - flattener->block_name_count) * sizeof(*flattener->block_names));
flattener->block_name_count = block_name_count;
vkd3d_string_buffer_init(&buffer);
va_start(args, fmt);
vkd3d_string_buffer_vprintf(&buffer, fmt, args);
va_end(args);
flattener->block_names[block_id] = buffer.buffer;
}
static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener)
{
bool main_block_open, is_hull_shader, after_declarations_section;
struct vkd3d_shader_parser *parser = flattener->parser;
struct vkd3d_shader_instruction_array *instructions;
struct vsir_program *program = &parser->program;
struct vkd3d_shader_instruction *dst_ins;
size_t i;
instructions = &program->instructions;
is_hull_shader = program->shader_version.type == VKD3D_SHADER_TYPE_HULL;
main_block_open = !is_hull_shader;
after_declarations_section = is_hull_shader;
if (!cf_flattener_require_space(flattener, instructions->count + 1))
return VKD3D_ERROR_OUT_OF_MEMORY;
for (i = 0; i < instructions->count; ++i)
{
unsigned int loop_header_block_id, loop_body_block_id, continue_block_id, merge_block_id, true_block_id;
const struct vkd3d_shader_instruction *instruction = &instructions->elements[i];
const struct vkd3d_shader_src_param *src = instruction->src;
struct cf_flattener_info *cf_info;
flattener->location = instruction->location;
/* Declarations should occur before the first code block, which in hull shaders is marked by the first
* phase instruction, and in all other shader types begins with the first label instruction. */
if (!after_declarations_section && !vsir_instruction_is_dcl(instruction)
&& instruction->handler_idx != VKD3DSIH_NOP)
{
after_declarations_section = true;
cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener));
}
cf_info = flattener->control_flow_depth
? &flattener->control_flow_info[flattener->control_flow_depth - 1] : NULL;
switch (instruction->handler_idx)
{
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
case VKD3DSIH_HS_FORK_PHASE:
case VKD3DSIH_HS_JOIN_PHASE:
if (!cf_flattener_copy_instruction(flattener, instruction))
return VKD3D_ERROR_OUT_OF_MEMORY;
if (instruction->handler_idx != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags)
after_declarations_section = false;
break;
case VKD3DSIH_LABEL:
vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
"Aborting due to not yet implemented feature: Label instruction.");
return VKD3D_ERROR_NOT_IMPLEMENTED;
case VKD3DSIH_IF:
if (!(cf_info = cf_flattener_push_control_flow_level(flattener)))
return VKD3D_ERROR_OUT_OF_MEMORY;
true_block_id = cf_flattener_alloc_block_id(flattener);
merge_block_id = cf_flattener_alloc_block_id(flattener);
cf_info->u.if_.false_param = cf_flattener_emit_branch(flattener, merge_block_id, 0,
src, true_block_id, merge_block_id, instruction->flags);
if (!cf_info->u.if_.false_param)
return VKD3D_ERROR_OUT_OF_MEMORY;
cf_flattener_emit_label(flattener, true_block_id);
cf_info->u.if_.id = flattener->branch_id;
cf_info->u.if_.merge_block_id = merge_block_id;
cf_info->u.if_.else_block_id = 0;
cf_info->inside_block = true;
cf_info->current_block = VKD3D_BLOCK_IF;
cf_flattener_create_block_name(flattener, merge_block_id, "branch%u_merge", flattener->branch_id);
cf_flattener_create_block_name(flattener, true_block_id, "branch%u_true", flattener->branch_id);
++flattener->branch_id;
break;
case VKD3DSIH_ELSE:
if (cf_info->inside_block)
cf_flattener_emit_unconditional_branch(flattener, cf_info->u.if_.merge_block_id);
cf_info->u.if_.else_block_id = cf_flattener_alloc_block_id(flattener);
cf_info->u.if_.false_param->reg.idx[0].offset = cf_info->u.if_.else_block_id;
cf_flattener_create_block_name(flattener,
cf_info->u.if_.else_block_id, "branch%u_false", cf_info->u.if_.id);
cf_flattener_emit_label(flattener, cf_info->u.if_.else_block_id);
cf_info->inside_block = true;
break;
case VKD3DSIH_ENDIF:
if (cf_info->inside_block)
cf_flattener_emit_unconditional_branch(flattener, cf_info->u.if_.merge_block_id);
cf_flattener_emit_label(flattener, cf_info->u.if_.merge_block_id);
cf_flattener_pop_control_flow_level(flattener);
break;
case VKD3DSIH_LOOP:
if (!(cf_info = cf_flattener_push_control_flow_level(flattener)))
return VKD3D_ERROR_OUT_OF_MEMORY;
loop_header_block_id = cf_flattener_alloc_block_id(flattener);
loop_body_block_id = cf_flattener_alloc_block_id(flattener);
continue_block_id = cf_flattener_alloc_block_id(flattener);
merge_block_id = cf_flattener_alloc_block_id(flattener);
cf_flattener_emit_unconditional_branch(flattener, loop_header_block_id);
cf_flattener_emit_label(flattener, loop_header_block_id);
cf_flattener_emit_branch(flattener, merge_block_id, continue_block_id,
NULL, loop_body_block_id, 0, 0);
cf_flattener_emit_label(flattener, loop_body_block_id);
cf_info->u.loop.header_block_id = loop_header_block_id;
cf_info->u.loop.continue_block_id = continue_block_id;
cf_info->u.loop.merge_block_id = merge_block_id;
cf_info->current_block = VKD3D_BLOCK_LOOP;
cf_info->inside_block = true;
cf_flattener_create_block_name(flattener, loop_header_block_id, "loop%u_header", flattener->loop_id);
cf_flattener_create_block_name(flattener, loop_body_block_id, "loop%u_body", flattener->loop_id);
cf_flattener_create_block_name(flattener, continue_block_id, "loop%u_continue", flattener->loop_id);
cf_flattener_create_block_name(flattener, merge_block_id, "loop%u_merge", flattener->loop_id);
++flattener->loop_id;
break;
case VKD3DSIH_ENDLOOP:
if (cf_info->inside_block)
cf_flattener_emit_unconditional_branch(flattener, cf_info->u.loop.continue_block_id);
cf_flattener_emit_label(flattener, cf_info->u.loop.continue_block_id);
cf_flattener_emit_unconditional_branch(flattener, cf_info->u.loop.header_block_id);
cf_flattener_emit_label(flattener, cf_info->u.loop.merge_block_id);
cf_flattener_pop_control_flow_level(flattener);
break;
case VKD3DSIH_SWITCH:
if (!(cf_info = cf_flattener_push_control_flow_level(flattener)))
return VKD3D_ERROR_OUT_OF_MEMORY;
merge_block_id = cf_flattener_alloc_block_id(flattener);
cf_info->u.switch_.ins_location = flattener->instruction_count;
cf_info->u.switch_.condition = src;
if (!(dst_ins = cf_flattener_require_space(flattener, 1)))
return VKD3D_ERROR_OUT_OF_MEMORY;
vsir_instruction_init(dst_ins, &instruction->location, VKD3DSIH_SWITCH_MONOLITHIC);
++flattener->instruction_count;
cf_info->u.switch_.id = flattener->switch_id;
cf_info->u.switch_.merge_block_id = merge_block_id;
cf_info->u.switch_.cases = NULL;
cf_info->u.switch_.cases_size = 0;
cf_info->u.switch_.cases_count = 0;
cf_info->u.switch_.default_block_id = 0;
cf_info->inside_block = false;
cf_info->current_block = VKD3D_BLOCK_SWITCH;
cf_flattener_create_block_name(flattener, merge_block_id, "switch%u_merge", flattener->switch_id);
++flattener->switch_id;
if (!vkd3d_array_reserve((void **)&cf_info->u.switch_.cases, &cf_info->u.switch_.cases_size,
10, sizeof(*cf_info->u.switch_.cases)))
return VKD3D_ERROR_OUT_OF_MEMORY;
break;
case VKD3DSIH_ENDSWITCH:
{
struct vkd3d_shader_src_param *src_params;
unsigned int j;
if (!cf_info->u.switch_.default_block_id)
cf_info->u.switch_.default_block_id = cf_info->u.switch_.merge_block_id;
cf_flattener_emit_label(flattener, cf_info->u.switch_.merge_block_id);
/* The SWITCH instruction is completed when the endswitch
* instruction is processed because we do not know the number
* of case statements or the default block id in advance.*/
dst_ins = &flattener->instructions[cf_info->u.switch_.ins_location];
if (!(src_params = instruction_src_params_alloc(dst_ins, cf_info->u.switch_.cases_count * 2 + 3, flattener)))
{
vkd3d_free(cf_info->u.switch_.cases);
return VKD3D_ERROR_OUT_OF_MEMORY;
}
src_params[0] = *cf_info->u.switch_.condition;
vsir_src_param_init_label(&src_params[1], cf_info->u.switch_.default_block_id);
vsir_src_param_init_label(&src_params[2], cf_info->u.switch_.merge_block_id);
for (j = 0; j < cf_info->u.switch_.cases_count; ++j)
{
unsigned int index = j * 2 + 3;
vsir_src_param_init(&src_params[index], VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0);
src_params[index].reg.u.immconst_u32[0] = cf_info->u.switch_.cases[j].value;
vsir_src_param_init_label(&src_params[index + 1], cf_info->u.switch_.cases[j].block_id);
}
vkd3d_free(cf_info->u.switch_.cases);
cf_flattener_pop_control_flow_level(flattener);
break;
}
case VKD3DSIH_CASE:
{
unsigned int label_id, value;
if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X))
{
WARN("Unexpected src swizzle %#x.\n", src->swizzle);
vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE,
"The swizzle for a switch case value is not scalar X.");
}
value = *src->reg.u.immconst_u32;
if (!vkd3d_array_reserve((void **)&cf_info->u.switch_.cases, &cf_info->u.switch_.cases_size,
cf_info->u.switch_.cases_count + 1, sizeof(*cf_info->u.switch_.cases)))
return VKD3D_ERROR_OUT_OF_MEMORY;
label_id = cf_flattener_alloc_block_id(flattener);
if (cf_info->inside_block) /* fall-through */
cf_flattener_emit_unconditional_branch(flattener, label_id);
cf_info->u.switch_.cases[cf_info->u.switch_.cases_count].value = value;
cf_info->u.switch_.cases[cf_info->u.switch_.cases_count].block_id = label_id;
++cf_info->u.switch_.cases_count;
cf_flattener_emit_label(flattener, label_id);
cf_flattener_create_block_name(flattener, label_id, "switch%u_case%u", cf_info->u.switch_.id, value);
cf_info->inside_block = true;
break;
}
case VKD3DSIH_DEFAULT:
cf_info->u.switch_.default_block_id = cf_flattener_alloc_block_id(flattener);
if (cf_info->inside_block) /* fall-through */
cf_flattener_emit_unconditional_branch(flattener, cf_info->u.switch_.default_block_id);
cf_flattener_emit_label(flattener, cf_info->u.switch_.default_block_id);
cf_flattener_create_block_name(flattener, cf_info->u.switch_.default_block_id,
"switch%u_default", cf_info->u.switch_.id);
cf_info->inside_block = true;
break;
case VKD3DSIH_BREAK:
{
struct cf_flattener_info *breakable_cf_info;
if (!(breakable_cf_info = cf_flattener_find_innermost_breakable_cf_construct(flattener)))
{
FIXME("Unhandled break instruction.\n");
return VKD3D_ERROR_INVALID_SHADER;
}
if (breakable_cf_info->current_block == VKD3D_BLOCK_LOOP)
{
cf_flattener_emit_unconditional_branch(flattener, breakable_cf_info->u.loop.merge_block_id);
}
else if (breakable_cf_info->current_block == VKD3D_BLOCK_SWITCH)
{
cf_flattener_emit_unconditional_branch(flattener, breakable_cf_info->u.switch_.merge_block_id);
}
cf_info->inside_block = false;
break;
}
case VKD3DSIH_BREAKP:
{
struct cf_flattener_info *loop_cf_info;
if (!(loop_cf_info = cf_flattener_find_innermost_loop(flattener)))
{
ERR("Invalid 'breakc' instruction outside loop.\n");
return VKD3D_ERROR_INVALID_SHADER;
}
cf_flattener_emit_conditional_branch_and_merge(flattener,
src, loop_cf_info->u.loop.merge_block_id, instruction->flags);
break;
}
case VKD3DSIH_CONTINUE:
{
struct cf_flattener_info *loop_cf_info;
if (!(loop_cf_info = cf_flattener_find_innermost_loop(flattener)))
{
ERR("Invalid 'continue' instruction outside loop.\n");
return VKD3D_ERROR_INVALID_SHADER;
}
cf_flattener_emit_unconditional_branch(flattener, loop_cf_info->u.loop.continue_block_id);
cf_info->inside_block = false;
break;
}
case VKD3DSIH_CONTINUEP:
{
struct cf_flattener_info *loop_cf_info;
if (!(loop_cf_info = cf_flattener_find_innermost_loop(flattener)))
{
ERR("Invalid 'continuec' instruction outside loop.\n");
return VKD3D_ERROR_INVALID_SHADER;
}
cf_flattener_emit_conditional_branch_and_merge(flattener,
src, loop_cf_info->u.loop.continue_block_id, instruction->flags);
break;
}
case VKD3DSIH_RET:
if (!cf_flattener_copy_instruction(flattener, instruction))
return VKD3D_ERROR_OUT_OF_MEMORY;
if (cf_info)
cf_info->inside_block = false;
else
main_block_open = false;
break;
default:
if (!cf_flattener_copy_instruction(flattener, instruction))
return VKD3D_ERROR_OUT_OF_MEMORY;
break;
}
}
if (main_block_open)
{
if (!(dst_ins = cf_flattener_require_space(flattener, 1)))
return VKD3D_ERROR_OUT_OF_MEMORY;
vsir_instruction_init(dst_ins, &flattener->location, VKD3DSIH_RET);
++flattener->instruction_count;
}
return flattener->allocation_failed ? VKD3D_ERROR_OUT_OF_MEMORY : VKD3D_OK;
}
static enum vkd3d_result flatten_control_flow_constructs(struct vkd3d_shader_parser *parser)
{
struct vsir_program *program = &parser->program;
struct cf_flattener flattener = {0};
enum vkd3d_result result;
flattener.parser = parser;
result = cf_flattener_iterate_instruction_array(&flattener);
if (result >= 0)
{
vkd3d_free(parser->program.instructions.elements);
program->instructions.elements = flattener.instructions;
program->instructions.capacity = flattener.instruction_capacity;
program->instructions.count = flattener.instruction_count;
program->block_count = flattener.block_id;
}
else
{
vkd3d_free(flattener.instructions);
}
vkd3d_free(flattener.control_flow_info);
/* Simpler to always free these in vsir_program_cleanup(). */
program->block_names = flattener.block_names;
program->block_name_count = flattener.block_name_count;
return result;
}
static unsigned int label_from_src_param(const struct vkd3d_shader_src_param *param)
{
assert(param->reg.type == VKD3DSPR_LABEL);
return param->reg.idx[0].offset;
}
static bool reserve_instructions(struct vkd3d_shader_instruction **instructions, size_t *capacity, size_t count)
{
if (!vkd3d_array_reserve((void **)instructions, capacity, count, sizeof(**instructions)))
{
ERR("Failed to allocate instructions.\n");
return false;
}
return true;
}
/* A record represents replacing a jump from block `switch_label' to
* block `target_label' with a jump from block `if_label' to block
* `target_label'. */
struct lower_switch_to_if_ladder_block_mapping
{
unsigned int switch_label;
unsigned int if_label;
unsigned int target_label;
};
static bool lower_switch_to_if_ladder_add_block_mapping(struct lower_switch_to_if_ladder_block_mapping **block_map,
size_t *map_capacity, size_t *map_count, unsigned int switch_label, unsigned int if_label, unsigned int target_label)
{
if (!vkd3d_array_reserve((void **)block_map, map_capacity, *map_count + 1, sizeof(**block_map)))
{
ERR("Failed to allocate block mapping.\n");
return false;
}
(*block_map)[*map_count].switch_label = switch_label;
(*block_map)[*map_count].if_label = if_label;
(*block_map)[*map_count].target_label = target_label;
*map_count += 1;
return true;
}
static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program)
{
unsigned int block_count = program->block_count, ssa_count = program->ssa_count, current_label = 0, if_label;
size_t ins_capacity = 0, ins_count = 0, i, map_capacity = 0, map_count = 0;
struct vkd3d_shader_instruction *instructions = NULL;
struct lower_switch_to_if_ladder_block_mapping *block_map = NULL;
if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count))
goto fail;
/* First subpass: convert SWITCH_MONOLITHIC instructions to
* selection ladders, keeping a map between blocks before and
* after the subpass. */
for (i = 0; i < program->instructions.count; ++i)
{
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
unsigned int case_count, j, default_label;
switch (ins->handler_idx)
{
case VKD3DSIH_LABEL:
current_label = label_from_src_param(&ins->src[0]);
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1))
goto fail;
instructions[ins_count++] = *ins;
continue;
case VKD3DSIH_SWITCH_MONOLITHIC:
break;
default:
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1))
goto fail;
instructions[ins_count++] = *ins;
continue;
}
case_count = (ins->src_count - 3) / 2;
default_label = label_from_src_param(&ins->src[1]);
/* In principle we can have a switch with no cases, and we
* just have to jump to the default label. */
if (case_count == 0)
{
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1))
goto fail;
if (!vsir_instruction_init_with_params(program, &instructions[ins_count],
&ins->location, VKD3DSIH_BRANCH, 0, 1))
goto fail;
vsir_src_param_init_label(&instructions[ins_count].src[0], default_label);
++ins_count;
}
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3 * case_count - 1))
goto fail;
if_label = current_label;
for (j = 0; j < case_count; ++j)
{
unsigned int fallthrough_label, case_label = label_from_src_param(&ins->src[3 + 2 * j + 1]);
if (!vsir_instruction_init_with_params(program,
&instructions[ins_count], &ins->location, VKD3DSIH_IEQ, 1, 2))
goto fail;
dst_param_init_ssa_bool(&instructions[ins_count].dst[0], ssa_count);
instructions[ins_count].src[0] = ins->src[0];
instructions[ins_count].src[1] = ins->src[3 + 2 * j];
++ins_count;
/* For all cases except the last one we fall through to
* the following case; the last one has to jump to the
* default label. */
if (j == case_count - 1)
fallthrough_label = default_label;
else
fallthrough_label = block_count + 1;
if (!vsir_instruction_init_with_params(program, &instructions[ins_count],
&ins->location, VKD3DSIH_BRANCH, 0, 3))
goto fail;
src_param_init_ssa_bool(&instructions[ins_count].src[0], ssa_count);
vsir_src_param_init_label(&instructions[ins_count].src[1], case_label);
vsir_src_param_init_label(&instructions[ins_count].src[2], fallthrough_label);
++ins_count;
++ssa_count;
if (!lower_switch_to_if_ladder_add_block_mapping(&block_map, &map_capacity, &map_count,
current_label, if_label, case_label))
goto fail;
if (j == case_count - 1)
{
if (!lower_switch_to_if_ladder_add_block_mapping(&block_map, &map_capacity, &map_count,
current_label, if_label, default_label))
goto fail;
}
else
{
if (!vsir_instruction_init_with_params(program,
&instructions[ins_count], &ins->location, VKD3DSIH_LABEL, 0, 1))
goto fail;
vsir_src_param_init_label(&instructions[ins_count].src[0], ++block_count);
++ins_count;
if_label = block_count;
}
}
}
/* Second subpass: creating new blocks might have broken
* references in PHI instructions, so we use the block map to fix
* them. */
current_label = 0;
for (i = 0; i < ins_count; ++i)
{
struct vkd3d_shader_instruction *ins = &instructions[i];
struct vkd3d_shader_src_param *new_src;
unsigned int j, l, new_src_count = 0;
switch (ins->handler_idx)
{
case VKD3DSIH_LABEL:
current_label = label_from_src_param(&ins->src[0]);
continue;
case VKD3DSIH_PHI:
break;
default:
continue;
}
/* First count how many source parameters we need. */
for (j = 0; j < ins->src_count; j += 2)
{
unsigned int source_label = label_from_src_param(&ins->src[j + 1]);
size_t k, match_count = 0;
for (k = 0; k < map_count; ++k)
{
struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k];
if (mapping->switch_label == source_label && mapping->target_label == current_label)
match_count += 1;
}
new_src_count += (match_count != 0) ? 2 * match_count : 2;
}
assert(new_src_count >= ins->src_count);
/* Allocate more source parameters if needed. */
if (new_src_count == ins->src_count)
{
new_src = ins->src;
}
else
{
if (!(new_src = vsir_program_get_src_params(program, new_src_count)))
{
ERR("Failed to allocate %u source parameters.\n", new_src_count);
goto fail;
}
}
/* Then do the copy. */
for (j = 0, l = 0; j < ins->src_count; j += 2)
{
unsigned int source_label = label_from_src_param(&ins->src[j + 1]);
size_t k, match_count = 0;
for (k = 0; k < map_count; ++k)
{
struct lower_switch_to_if_ladder_block_mapping *mapping = &block_map[k];
if (mapping->switch_label == source_label && mapping->target_label == current_label)
{
match_count += 1;
new_src[l] = ins->src[j];
new_src[l + 1] = ins->src[j + 1];
new_src[l + 1].reg.idx[0].offset = mapping->if_label;
l += 2;
}
}
if (match_count == 0)
{
new_src[l] = ins->src[j];
new_src[l + 1] = ins->src[j + 1];
l += 2;
}
}
assert(l == new_src_count);
ins->src_count = new_src_count;
ins->src = new_src;
}
vkd3d_free(program->instructions.elements);
vkd3d_free(block_map);
program->instructions.elements = instructions;
program->instructions.capacity = ins_capacity;
program->instructions.count = ins_count;
program->block_count = block_count;
program->ssa_count = ssa_count;
return VKD3D_OK;
fail:
vkd3d_free(instructions);
vkd3d_free(block_map);
return VKD3D_ERROR_OUT_OF_MEMORY;
}
static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src);
/* This is idempotent: it can be safely applied more than once on the
* same register. */
static void materialize_ssas_to_temps_process_reg(struct vkd3d_shader_parser *parser, struct vkd3d_shader_register *reg)
{
unsigned int i;
if (reg->type == VKD3DSPR_SSA)
{
reg->type = VKD3DSPR_TEMP;
reg->idx[0].offset += parser->program.temp_count;
}
for (i = 0; i < reg->idx_count; ++i)
if (reg->idx[i].rel_addr)
materialize_ssas_to_temps_process_src_param(parser, reg->idx[i].rel_addr);
}
static void materialize_ssas_to_temps_process_dst_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_dst_param *dst)
{
materialize_ssas_to_temps_process_reg(parser, &dst->reg);
}
static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src)
{
materialize_ssas_to_temps_process_reg(parser, &src->reg);
}
static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins,
unsigned int label)
{
unsigned int i;
assert(ins->handler_idx == VKD3DSIH_PHI);
for (i = 0; i < ins->src_count; i += 2)
{
if (label_from_src_param(&ins->src[i + 1]) == label)
return &ins->src[i];
}
vkd3d_unreachable();
}
static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser *parser,
struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc,
const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond,
const struct vkd3d_shader_src_param *source, bool invert)
{
struct vkd3d_shader_src_param *src;
struct vkd3d_shader_dst_param *dst;
if (!vsir_instruction_init_with_params(&parser->program, instruction, loc,
cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1))
return false;
dst = instruction->dst;
src = instruction->src;
dst[0] = *dest;
materialize_ssas_to_temps_process_dst_param(parser, &dst[0]);
assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0);
assert(dst[0].modifiers == 0);
assert(dst[0].shift == 0);
if (cond)
{
src[0] = *cond;
src[1 + invert] = *source;
memset(&src[2 - invert], 0, sizeof(src[2 - invert]));
src[2 - invert].reg = dst[0].reg;
materialize_ssas_to_temps_process_src_param(parser, &src[1]);
materialize_ssas_to_temps_process_src_param(parser, &src[2]);
}
else
{
src[0] = *source;
materialize_ssas_to_temps_process_src_param(parser, &src[0]);
}
return true;
}
static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *parser)
{
struct vkd3d_shader_instruction *instructions = NULL;
struct materialize_ssas_to_temps_block_data
{
size_t phi_begin;
size_t phi_count;
} *block_index = NULL;
size_t ins_capacity = 0, ins_count = 0, i;
unsigned int current_label = 0;
if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count))
goto fail;
if (!(block_index = vkd3d_calloc(parser->program.block_count, sizeof(*block_index))))
{
ERR("Failed to allocate block index.\n");
goto fail;
}
for (i = 0; i < parser->program.instructions.count; ++i)
{
struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i];
switch (ins->handler_idx)
{
case VKD3DSIH_LABEL:
current_label = label_from_src_param(&ins->src[0]);
break;
case VKD3DSIH_PHI:
assert(current_label != 0);
assert(i != 0);
if (block_index[current_label - 1].phi_begin == 0)
block_index[current_label - 1].phi_begin = i;
block_index[current_label - 1].phi_count += 1;
break;
default:
current_label = 0;
break;
}
}
for (i = 0; i < parser->program.instructions.count; ++i)
{
struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i];
size_t j;
for (j = 0; j < ins->dst_count; ++j)
materialize_ssas_to_temps_process_dst_param(parser, &ins->dst[j]);
for (j = 0; j < ins->src_count; ++j)
materialize_ssas_to_temps_process_src_param(parser, &ins->src[j]);
switch (ins->handler_idx)
{
case VKD3DSIH_LABEL:
current_label = label_from_src_param(&ins->src[0]);
break;
case VKD3DSIH_BRANCH:
{
if (vsir_register_is_label(&ins->src[0].reg))
{
const struct materialize_ssas_to_temps_block_data *data = &block_index[label_from_src_param(&ins->src[0]) - 1];
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + data->phi_count))
goto fail;
for (j = data->phi_begin; j < data->phi_begin + data->phi_count; ++j)
{
const struct vkd3d_shader_src_param *source;
source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label);
if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location,
&parser->program.instructions.elements[j].dst[0], NULL, source, false))
goto fail;
++ins_count;
}
}
else
{
struct materialize_ssas_to_temps_block_data *data_true = &block_index[label_from_src_param(&ins->src[1]) - 1],
*data_false = &block_index[label_from_src_param(&ins->src[2]) - 1];
const struct vkd3d_shader_src_param *cond = &ins->src[0];
if (!reserve_instructions(&instructions, &ins_capacity,
ins_count + data_true->phi_count + data_false->phi_count))
goto fail;
for (j = data_true->phi_begin; j < data_true->phi_begin + data_true->phi_count; ++j)
{
const struct vkd3d_shader_src_param *source;
source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label);
if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location,
&parser->program.instructions.elements[j].dst[0], cond, source, false))
goto fail;
++ins_count;
}
for (j = data_false->phi_begin; j < data_false->phi_begin + data_false->phi_count; ++j)
{
const struct vkd3d_shader_src_param *source;
source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label);
if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location,
&parser->program.instructions.elements[j].dst[0], cond, source, true))
goto fail;
++ins_count;
}
}
break;
}
case VKD3DSIH_PHI:
continue;
default:
break;
}
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1))
goto fail;
instructions[ins_count++] = *ins;
}
vkd3d_free(parser->program.instructions.elements);
vkd3d_free(block_index);
parser->program.instructions.elements = instructions;
parser->program.instructions.capacity = ins_capacity;
parser->program.instructions.count = ins_count;
parser->program.temp_count += parser->program.ssa_count;
parser->program.ssa_count = 0;
return VKD3D_OK;
fail:
vkd3d_free(instructions);
vkd3d_free(block_index);
return VKD3D_ERROR_OUT_OF_MEMORY;
}
static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *parser)
{
const unsigned int block_temp_idx = parser->program.temp_count;
struct vkd3d_shader_instruction *instructions = NULL;
const struct vkd3d_shader_location no_loc = {0};
size_t ins_capacity = 0, ins_count = 0, i;
bool first_label_found = false;
if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count))
goto fail;
for (i = 0; i < parser->program.instructions.count; ++i)
{
struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i];
switch (ins->handler_idx)
{
case VKD3DSIH_PHI:
case VKD3DSIH_SWITCH_MONOLITHIC:
vkd3d_unreachable();
case VKD3DSIH_LABEL:
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 4))
goto fail;
if (!first_label_found)
{
first_label_found = true;
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1))
goto fail;
dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx);
src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0]));
ins_count++;
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_LOOP, 0, 0))
goto fail;
ins_count++;
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_SWITCH, 0, 1))
goto fail;
src_param_init_temp_uint(&instructions[ins_count].src[0], block_temp_idx);
ins_count++;
}
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_CASE, 0, 1))
goto fail;
src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0]));
ins_count++;
break;
case VKD3DSIH_BRANCH:
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 2))
goto fail;
if (vsir_register_is_label(&ins->src[0].reg))
{
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1))
goto fail;
dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx);
src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0]));
ins_count++;
}
else
{
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOVC, 1, 3))
goto fail;
dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx);
instructions[ins_count].src[0] = ins->src[0];
src_param_init_const_uint(&instructions[ins_count].src[1], label_from_src_param(&ins->src[1]));
src_param_init_const_uint(&instructions[ins_count].src[2], label_from_src_param(&ins->src[2]));
ins_count++;
}
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_BREAK, 0, 0))
goto fail;
ins_count++;
break;
case VKD3DSIH_RET:
default:
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1))
goto fail;
instructions[ins_count++] = *ins;
break;
}
}
assert(first_label_found);
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3))
goto fail;
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDSWITCH, 0, 0))
goto fail;
ins_count++;
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDLOOP, 0, 0))
goto fail;
ins_count++;
if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_RET, 0, 0))
goto fail;
ins_count++;
vkd3d_free(parser->program.instructions.elements);
parser->program.instructions.elements = instructions;
parser->program.instructions.capacity = ins_capacity;
parser->program.instructions.count = ins_count;
parser->program.temp_count += 1;
return VKD3D_OK;
fail:
vkd3d_free(instructions);
return VKD3D_ERROR_OUT_OF_MEMORY;
}
enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser,
const struct vkd3d_shader_compile_info *compile_info)
{
struct vkd3d_shader_instruction_array *instructions = &parser->program.instructions;
enum vkd3d_result result = VKD3D_OK;
remove_dcl_temps(&parser->program);
if ((result = instruction_array_lower_texkills(parser)) < 0)
return result;
if (parser->shader_desc.is_dxil)
{
if ((result = lower_switch_to_if_ladder(&parser->program)) < 0)
return result;
if ((result = materialize_ssas_to_temps(parser)) < 0)
return result;
if ((result = simple_structurizer_run(parser)) < 0)
return result;
}
else
{
if (parser->program.shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
{
if ((result = remap_output_signature(parser, compile_info)) < 0)
return result;
}
if (parser->program.shader_version.type == VKD3D_SHADER_TYPE_HULL)
{
if ((result = instruction_array_flatten_hull_shader_phases(instructions)) < 0)
return result;
if ((result = instruction_array_normalise_hull_shader_control_point_io(instructions,
&parser->shader_desc.input_signature)) < 0)
return result;
}
if ((result = shader_normalise_io_registers(parser)) < 0)
return result;
if ((result = instruction_array_normalise_flat_constants(&parser->program)) < 0)
return result;
remove_dead_code(&parser->program);
if ((result = normalise_combined_samplers(parser)) < 0)
return result;
}
if ((result = flatten_control_flow_constructs(parser)) < 0)
return result;
if (TRACE_ON())
vkd3d_shader_trace(&parser->program);
if (!parser->failed && (result = vsir_validate(parser)) < 0)
return result;
if (parser->failed)
result = VKD3D_ERROR_INVALID_SHADER;
return result;
}
struct validation_context
{
struct vkd3d_shader_parser *parser;
const struct vsir_program *program;
size_t instruction_idx;
bool invalid_instruction_idx;
bool dcl_temps_found;
enum vkd3d_shader_opcode phase;
enum cf_type
{
CF_TYPE_UNKNOWN = 0,
CF_TYPE_STRUCTURED,
CF_TYPE_BLOCKS,
} cf_type;
bool inside_block;
struct validation_context_temp_data
{
enum vsir_dimension dimension;
size_t first_seen;
} *temps;
struct validation_context_ssa_data
{
enum vsir_dimension dimension;
enum vkd3d_data_type data_type;
size_t first_seen;
uint32_t write_mask;
uint32_t read_mask;
size_t first_assigned;
} *ssas;
enum vkd3d_shader_opcode *blocks;
size_t depth;
size_t blocks_capacity;
};
static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx,
enum vkd3d_shader_error error, const char *format, ...)
{
struct vkd3d_string_buffer buf;
va_list args;
vkd3d_string_buffer_init(&buf);
va_start(args, format);
vkd3d_string_buffer_vprintf(&buf, format, args);
va_end(args);
if (ctx->invalid_instruction_idx)
{
vkd3d_shader_parser_error(ctx->parser, error, "%s", buf.buffer);
ERR("VSIR validation error: %s\n", buf.buffer);
}
else
{
vkd3d_shader_parser_error(ctx->parser, error, "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer);
ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer);
}
vkd3d_string_buffer_cleanup(&buf);
}
static void vsir_validate_src_param(struct validation_context *ctx,
const struct vkd3d_shader_src_param *src);
static void vsir_validate_register(struct validation_context *ctx,
const struct vkd3d_shader_register *reg)
{
unsigned int i;
if (reg->type >= VKD3DSPR_COUNT)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register type %#x.",
reg->type);
if (reg->precision >= VKD3D_SHADER_REGISTER_PRECISION_COUNT)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid register precision %#x.",
reg->precision);
if (reg->data_type >= VKD3D_DATA_COUNT)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid register data type %#x.",
reg->data_type);
if (reg->dimension >= VSIR_DIMENSION_COUNT)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid register dimension %#x.",
reg->dimension);
if (reg->idx_count > ARRAY_SIZE(reg->idx))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid register index count %u.",
reg->idx_count);
for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i)
{
const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr;
if (reg->idx[i].rel_addr)
vsir_validate_src_param(ctx, param);
}
switch (reg->type)
{
case VKD3DSPR_TEMP:
{
struct validation_context_temp_data *data;
if (reg->idx_count != 1)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a TEMP register.",
reg->idx_count);
break;
}
if (reg->idx[0].rel_addr)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register.");
if (reg->idx[0].offset >= ctx->parser->program.temp_count)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.",
reg->idx[0].offset, ctx->parser->program.temp_count);
break;
}
data = &ctx->temps[reg->idx[0].offset];
if (reg->dimension == VSIR_DIMENSION_NONE)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a TEMP register.");
break;
}
/* TEMP registers can be scalar or vec4, provided that
* each individual register always appears with the same
* dimension. */
if (data->dimension == VSIR_DIMENSION_NONE)
{
data->dimension = reg->dimension;
data->first_seen = ctx->instruction_idx;
}
else if (data->dimension != reg->dimension)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a TEMP register: "
"it has already been seen with dimension %#x at instruction %zu.",
reg->dimension, data->dimension, data->first_seen);
}
break;
}
case VKD3DSPR_SSA:
{
struct validation_context_ssa_data *data;
if (reg->idx_count != 1)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a SSA register.",
reg->idx_count);
break;
}
if (reg->idx[0].rel_addr)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a SSA register.");
if (reg->idx[0].offset >= ctx->program->ssa_count)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
"SSA register index %u exceeds the maximum count %u.",
reg->idx[0].offset, ctx->program->ssa_count);
break;
}
data = &ctx->ssas[reg->idx[0].offset];
if (reg->dimension == VSIR_DIMENSION_NONE)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a SSA register.");
break;
}
/* SSA registers can be scalar or vec4, provided that each
* individual register always appears with the same
* dimension. */
if (data->dimension == VSIR_DIMENSION_NONE)
{
data->dimension = reg->dimension;
data->data_type = reg->data_type;
data->first_seen = ctx->instruction_idx;
}
else
{
if (data->dimension != reg->dimension)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a SSA register: "
"it has already been seen with dimension %#x at instruction %zu.",
reg->dimension, data->dimension, data->first_seen);
if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a SSA register: "
"it has already been seen with data type %#x at instruction %zu.",
reg->data_type, data->data_type, data->first_seen);
}
break;
}
case VKD3DSPR_LABEL:
if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.",
reg->precision);
if (reg->data_type != VKD3D_DATA_UINT)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.",
reg->data_type);
if (reg->dimension != VSIR_DIMENSION_NONE)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a LABEL register.",
reg->dimension);
if (reg->idx_count != 1)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a LABEL register.",
reg->idx_count);
break;
}
if (reg->idx[0].rel_addr)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a LABEL register.");
/* Index == 0 is invalid, but it is temporarily allowed
* for intermediate stages. Once we support validation
* dialects we can selectively check for that. */
if (reg->idx[0].offset > ctx->program->block_count)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
"LABEL register index %u exceeds the maximum count %u.",
reg->idx[0].offset, ctx->program->block_count);
break;
case VKD3DSPR_NULL:
if (reg->idx_count != 0)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a NULL register.",
reg->idx_count);
break;
case VKD3DSPR_IMMCONST:
if (reg->idx_count != 0)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST register.",
reg->idx_count);
break;
case VKD3DSPR_IMMCONST64:
if (reg->idx_count != 0)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST64 register.",
reg->idx_count);
break;
default:
break;
}
}
static void vsir_validate_dst_param(struct validation_context *ctx,
const struct vkd3d_shader_dst_param *dst)
{
vsir_validate_register(ctx, &dst->reg);
if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination has invalid write mask %#x.",
dst->write_mask);
switch (dst->reg.dimension)
{
case VSIR_DIMENSION_SCALAR:
if (dst->write_mask != VKD3DSP_WRITEMASK_0)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Scalar destination has invalid write mask %#x.",
dst->write_mask);
break;
case VSIR_DIMENSION_VEC4:
if (dst->write_mask == 0)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Vec4 destination has empty write mask.");
break;
default:
if (dst->write_mask != 0)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination of dimension %u has invalid write mask %#x.",
dst->reg.dimension, dst->write_mask);
break;
}
if (dst->modifiers & ~VKD3DSPDM_MASK)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.",
dst->modifiers);
switch (dst->shift)
{
case 0:
case 1:
case 2:
case 3:
case 13:
case 14:
case 15:
break;
default:
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.",
dst->shift);
}
switch (dst->reg.type)
{
case VKD3DSPR_SSA:
if (dst->reg.idx[0].offset < ctx->parser->program.ssa_count)
{
struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset];
if (data->write_mask == 0)
{
data->write_mask = dst->write_mask;
data->first_assigned = ctx->instruction_idx;
}
else
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE,
"SSA register is already assigned at instruction %zu.",
data->first_assigned);
}
}
break;
case VKD3DSPR_IMMCONST:
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid IMMCONST register used as destination parameter.");
break;
case VKD3DSPR_IMMCONST64:
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid IMMCONST64 register used as destination parameter.");
break;
default:
break;
}
}
static void vsir_validate_src_param(struct validation_context *ctx,
const struct vkd3d_shader_src_param *src)
{
vsir_validate_register(ctx, &src->reg);
if (src->swizzle & ~0x03030303u)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.",
src->swizzle);
if (src->reg.dimension != VSIR_DIMENSION_VEC4 && src->swizzle != 0)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.",
src->reg.dimension, src->swizzle);
if (src->modifiers >= VKD3DSPSM_COUNT)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.",
src->modifiers);
switch (src->reg.type)
{
case VKD3DSPR_SSA:
if (src->reg.idx[0].offset < ctx->parser->program.ssa_count)
{
struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset];
unsigned int i;
for (i = 0; i < VKD3D_VEC4_SIZE; ++i)
data->read_mask |= (1u << vsir_swizzle_get_component(src->swizzle, i));
}
break;
default:
break;
}
}
static void vsir_validate_dst_count(struct validation_context *ctx,
const struct vkd3d_shader_instruction *instruction, unsigned int count)
{
if (instruction->dst_count != count)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT,
"Invalid destination count %u for an instruction of type %#x, expected %u.",
instruction->dst_count, instruction->handler_idx, count);
}
static void vsir_validate_src_count(struct validation_context *ctx,
const struct vkd3d_shader_instruction *instruction, unsigned int count)
{
if (instruction->src_count != count)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
"Invalid source count %u for an instruction of type %#x, expected %u.",
instruction->src_count, instruction->handler_idx, count);
}
static bool vsir_validate_src_min_count(struct validation_context *ctx,
const struct vkd3d_shader_instruction *instruction, unsigned int count)
{
if (instruction->src_count < count)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
"Invalid source count %u for an instruction of type %#x, expected at least %u.",
instruction->src_count, instruction->handler_idx, count);
return false;
}
return true;
}
static bool vsir_validate_src_max_count(struct validation_context *ctx,
const struct vkd3d_shader_instruction *instruction, unsigned int count)
{
if (instruction->src_count > count)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
"Invalid source count %u for an instruction of type %#x, expected at most %u.",
instruction->src_count, instruction->handler_idx, count);
return false;
}
return true;
}
static const char *name_from_cf_type(enum cf_type type)
{
switch (type)
{
case CF_TYPE_STRUCTURED:
return "structured";
case CF_TYPE_BLOCKS:
return "block-based";
default:
vkd3d_unreachable();
}
}
static void vsir_validate_cf_type(struct validation_context *ctx,
const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type)
{
assert(ctx->cf_type != CF_TYPE_UNKNOWN);
assert(expected_type != CF_TYPE_UNKNOWN);
if (ctx->cf_type != expected_type)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.",
instruction->handler_idx, name_from_cf_type(ctx->cf_type));
}
static void vsir_validate_instruction(struct validation_context *ctx)
{
const struct vkd3d_shader_version *version = &ctx->program->shader_version;
const struct vkd3d_shader_instruction *instruction;
size_t i;
instruction = &ctx->program->instructions.elements[ctx->instruction_idx];
ctx->parser->location = instruction->location;
for (i = 0; i < instruction->dst_count; ++i)
vsir_validate_dst_param(ctx, &instruction->dst[i]);
for (i = 0; i < instruction->src_count; ++i)
vsir_validate_src_param(ctx, &instruction->src[i]);
if (instruction->handler_idx >= VKD3DSIH_INVALID)
{
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.",
instruction->handler_idx);
}
switch (instruction->handler_idx)
{
case VKD3DSIH_HS_DECLS:
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
case VKD3DSIH_HS_FORK_PHASE:
case VKD3DSIH_HS_JOIN_PHASE:
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 0);
if (version->type != VKD3D_SHADER_TYPE_HULL)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Phase instruction %#x is only valid in a hull shader.",
instruction->handler_idx);
if (ctx->depth != 0)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Phase instruction %#x must appear to top level.",
instruction->handler_idx);
ctx->phase = instruction->handler_idx;
ctx->dcl_temps_found = false;
return;
default:
break;
}
if (version->type == VKD3D_SHADER_TYPE_HULL && ctx->phase == VKD3DSIH_INVALID)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER,
"Instruction %#x appear before any phase instruction in a hull shader.",
instruction->handler_idx);
/* We support two different control flow types in shaders:
* block-based, like DXIL and SPIR-V, and structured, like D3DBC
* and TPF. The shader is detected as block-based when its first
* instruction, except for DCL_* and phases, is a LABEL. Currently
* we mandate that each shader is either purely block-based or
* purely structured. In principle we could allow structured
* constructs in a block, provided they are confined in a single
* block, but need for that hasn't arisen yet, so we don't. */
if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction))
{
if (instruction->handler_idx == VKD3DSIH_LABEL)
ctx->cf_type = CF_TYPE_BLOCKS;
else
ctx->cf_type = CF_TYPE_STRUCTURED;
}
if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction))
{
switch (instruction->handler_idx)
{
case VKD3DSIH_LABEL:
if (ctx->inside_block)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid LABEL instruction inside a block.");
ctx->inside_block = true;
break;
case VKD3DSIH_RET:
case VKD3DSIH_BRANCH:
case VKD3DSIH_SWITCH_MONOLITHIC:
if (!ctx->inside_block)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.",
instruction->handler_idx);
ctx->inside_block = false;
break;
default:
if (!ctx->inside_block)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.",
instruction->handler_idx);
break;
}
}
switch (instruction->handler_idx)
{
case VKD3DSIH_DCL_TEMPS:
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 0);
if (ctx->dcl_temps_found)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS, "Duplicate DCL_TEMPS instruction.");
if (instruction->declaration.count > ctx->program->temp_count)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS,
"Invalid DCL_TEMPS count %u, expected at most %u.",
instruction->declaration.count, ctx->program->temp_count);
ctx->dcl_temps_found = true;
break;
case VKD3DSIH_IF:
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 1);
if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
return;
ctx->blocks[ctx->depth++] = instruction->handler_idx;
break;
case VKD3DSIH_IFC:
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 2);
if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
return;
ctx->blocks[ctx->depth++] = VKD3DSIH_IF;
break;
case VKD3DSIH_ELSE:
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 0);
if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block.");
else
ctx->blocks[ctx->depth - 1] = instruction->handler_idx;
break;
case VKD3DSIH_ENDIF:
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 0);
if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDIF instruction doesn't terminate IF/ELSE block.");
else
--ctx->depth;
break;
case VKD3DSIH_LOOP:
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0);
if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
return;
ctx->blocks[ctx->depth++] = instruction->handler_idx;
break;
case VKD3DSIH_ENDLOOP:
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 0);
if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDLOOP instruction doesn't terminate LOOP block.");
else
--ctx->depth;
break;
case VKD3DSIH_REP:
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 1);
if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
return;
ctx->blocks[ctx->depth++] = instruction->handler_idx;
break;
case VKD3DSIH_ENDREP:
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 0);
if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDREP instruction doesn't terminate REP block.");
else
--ctx->depth;
break;
case VKD3DSIH_SWITCH:
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 1);
if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
return;
ctx->blocks[ctx->depth++] = instruction->handler_idx;
break;
case VKD3DSIH_ENDSWITCH:
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 0);
if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDSWITCH instruction doesn't terminate SWITCH block.");
else
--ctx->depth;
break;
case VKD3DSIH_RET:
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 0);
break;
case VKD3DSIH_LABEL:
vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
vsir_validate_dst_count(ctx, instruction, 0);
vsir_validate_src_count(ctx, instruction, 1);
if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid register of type %#x in a LABEL instruction, expected LABEL.",
instruction->src[0].reg.type);
break;
case VKD3DSIH_BRANCH:
vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
vsir_validate_dst_count(ctx, instruction, 0);
if (!vsir_validate_src_min_count(ctx, instruction, 1))
break;
if (vsir_register_is_label(&instruction->src[0].reg))
{
/* Unconditional branch: parameters are jump label,
* optional merge label, optional continue label. */
vsir_validate_src_max_count(ctx, instruction, 3);
for (i = 0; i < instruction->src_count; ++i)
{
if (!vsir_register_is_label(&instruction->src[i].reg))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.",
instruction->src[i].reg.type);
}
}
else
{
/* Conditional branch: parameters are condition, true
* jump label, false jump label, optional merge label,
* optional continue label. */
vsir_validate_src_min_count(ctx, instruction, 3);
vsir_validate_src_max_count(ctx, instruction, 5);
for (i = 1; i < instruction->src_count; ++i)
{
if (!vsir_register_is_label(&instruction->src[i].reg))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.",
instruction->src[i].reg.type);
}
}
break;
case VKD3DSIH_SWITCH_MONOLITHIC:
{
unsigned int case_count;
vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
vsir_validate_dst_count(ctx, instruction, 0);
/* Parameters are source, default label, merge label and
* then pairs of constant value and case label. */
if (!vsir_validate_src_min_count(ctx, instruction, 3))
break;
if (instruction->src_count % 2 != 1)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
"Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.",
instruction->src_count);
if (!vsir_register_is_label(&instruction->src[1].reg))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.",
instruction->src[1].reg.type);
if (!vsir_register_is_label(&instruction->src[2].reg))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.",
instruction->src[2].reg.type);
case_count = (instruction->src_count - 3) / 2;
for (i = 0; i < case_count; ++i)
{
unsigned int value_idx = 3 + 2 * i;
unsigned int label_idx = 3 + 2 * i + 1;
if (!register_is_constant(&instruction->src[value_idx].reg))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid value register for case %zu of type %#x in monolithic SWITCH instruction, "
"expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type);
if (!vsir_register_is_label(&instruction->src[label_idx].reg))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid label register for case %zu of type %#x in monolithic SWITCH instruction, "
"expected LABEL.", i, instruction->src[value_idx].reg.type);
}
break;
}
case VKD3DSIH_PHI:
{
unsigned int incoming_count;
vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
vsir_validate_dst_count(ctx, instruction, 1);
vsir_validate_src_min_count(ctx, instruction, 2);
if (instruction->src_count % 2 != 0)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
"Invalid source count %u for a PHI instruction, it must be an even number.",
instruction->src_count);
incoming_count = instruction->src_count / 2;
if (!register_is_ssa(&instruction->dst[0].reg))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid destination of type %#x in PHI instruction, expected SSA.",
instruction->dst[0].reg.type);
if (instruction->dst[0].reg.dimension != VSIR_DIMENSION_SCALAR)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
"Invalid destination dimension %#x in PHI instruction, expected scalar.",
instruction->dst[0].reg.dimension);
if (instruction->dst[0].modifiers != VKD3DSPDM_NONE)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS,
"Invalid modifiers %#x for the destination of a PHI instruction, expected none.",
instruction->dst[0].modifiers);
if (instruction->dst[0].shift != 0)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT,
"Invalid shift %#x for the destination of a PHI instruction, expected none.",
instruction->dst[0].shift);
for (i = 0; i < incoming_count; ++i)
{
unsigned int value_idx = 2 * i;
unsigned int label_idx = 2 * i + 1;
if (!register_is_constant(&instruction->src[value_idx].reg) && !register_is_ssa(&instruction->src[value_idx].reg))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid value register for incoming %zu of type %#x in PHI instruction, "
"expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type);
if (instruction->src[value_idx].reg.dimension != VSIR_DIMENSION_SCALAR)
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
"Invalid value dimension %#x for incoming %zu in PHI instruction, expected scalar.",
instruction->src[value_idx].reg.dimension, i);
if (!vsir_register_is_label(&instruction->src[label_idx].reg))
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
"Invalid label register for case %zu of type %#x in PHI instruction, "
"expected LABEL.", i, instruction->src[value_idx].reg.type);
}
break;
}
default:
break;
}
}
enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser)
{
struct validation_context ctx =
{
.parser = parser,
.program = &parser->program,
.phase = VKD3DSIH_INVALID,
};
unsigned int i;
if (!(parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION))
return VKD3D_OK;
if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps))))
goto fail;
if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas))))
goto fail;
for (ctx.instruction_idx = 0; ctx.instruction_idx < parser->program.instructions.count; ++ctx.instruction_idx)
vsir_validate_instruction(&ctx);
ctx.invalid_instruction_idx = true;
if (ctx.depth != 0)
validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "%zu nested blocks were not closed.", ctx.depth);
if (ctx.inside_block)
validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Last block was not closed.");
for (i = 0; i < ctx.program->ssa_count; ++i)
{
struct validation_context_ssa_data *data = &ctx.ssas[i];
if ((data->write_mask | data->read_mask) != data->write_mask)
validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE,
"SSA register %u has invalid read mask %#x, which is not a subset of the write mask %#x "
"at the point of definition.", i, data->read_mask, data->write_mask);
}
vkd3d_free(ctx.blocks);
vkd3d_free(ctx.temps);
vkd3d_free(ctx.ssas);
return VKD3D_OK;
fail:
vkd3d_free(ctx.blocks);
vkd3d_free(ctx.temps);
vkd3d_free(ctx.ssas);
return VKD3D_ERROR_OUT_OF_MEMORY;
}