mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2024-11-21 16:46:41 -08:00
6028 lines
218 KiB
C
6028 lines
218 KiB
C
/*
|
|
* Copyright 2023 Conor McCarthy for CodeWeavers
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
|
*/
|
|
|
|
#include "vkd3d_shader_private.h"
|
|
#include "vkd3d_types.h"
|
|
|
|
bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve)
|
|
{
|
|
program->shader_version = *version;
|
|
return shader_instruction_array_init(&program->instructions, reserve);
|
|
}
|
|
|
|
void vsir_program_cleanup(struct vsir_program *program)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < program->block_name_count; ++i)
|
|
vkd3d_free((void *)program->block_names[i]);
|
|
vkd3d_free(program->block_names);
|
|
shader_instruction_array_destroy(&program->instructions);
|
|
shader_signature_cleanup(&program->input_signature);
|
|
shader_signature_cleanup(&program->output_signature);
|
|
shader_signature_cleanup(&program->patch_constant_signature);
|
|
}
|
|
|
|
static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg)
|
|
{
|
|
return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID;
|
|
}
|
|
|
|
static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction)
|
|
{
|
|
enum vkd3d_shader_opcode handler_idx = instruction->handler_idx;
|
|
return (VKD3DSIH_DCL <= handler_idx && handler_idx <= VKD3DSIH_DCL_VERTICES_OUT)
|
|
|| handler_idx == VKD3DSIH_HS_DECLS;
|
|
}
|
|
|
|
static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins)
|
|
{
|
|
struct vkd3d_shader_location location = ins->location;
|
|
|
|
vsir_instruction_init(ins, &location, VKD3DSIH_NOP);
|
|
}
|
|
|
|
static bool vsir_instruction_init_with_params(struct vsir_program *program,
|
|
struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location,
|
|
enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count)
|
|
{
|
|
vsir_instruction_init(ins, location, handler_idx);
|
|
ins->dst_count = dst_count;
|
|
ins->src_count = src_count;
|
|
|
|
if (!(ins->dst = vsir_program_get_dst_params(program, ins->dst_count)))
|
|
{
|
|
ERR("Failed to allocate %u destination parameters.\n", dst_count);
|
|
return false;
|
|
}
|
|
|
|
if (!(ins->src = vsir_program_get_src_params(program, ins->src_count)))
|
|
{
|
|
ERR("Failed to allocate %u source parameters.\n", src_count);
|
|
return false;
|
|
}
|
|
|
|
memset(ins->dst, 0, sizeof(*ins->dst) * ins->dst_count);
|
|
memset(ins->src, 0, sizeof(*ins->src) * ins->src_count);
|
|
return true;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program,
|
|
struct vkd3d_shader_instruction *texkill, unsigned int *tmp_idx)
|
|
{
|
|
const unsigned int components_read = 3 + (program->shader_version.major >= 2);
|
|
struct vkd3d_shader_instruction_array *instructions = &program->instructions;
|
|
size_t pos = texkill - instructions->elements;
|
|
struct vkd3d_shader_instruction *ins;
|
|
unsigned int j;
|
|
|
|
if (!shader_instruction_array_insert_at(instructions, pos + 1, components_read + 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
if (*tmp_idx == ~0u)
|
|
*tmp_idx = program->temp_count++;
|
|
|
|
/* tmp = ins->dst[0] < 0 */
|
|
|
|
ins = &instructions->elements[pos + 1];
|
|
if (!vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_LTO, 1, 2))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
|
|
ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
|
|
ins->dst[0].reg.idx[0].offset = *tmp_idx;
|
|
ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL;
|
|
|
|
ins->src[0].reg = texkill->dst[0].reg;
|
|
ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE;
|
|
vsir_register_init(&ins->src[1].reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0);
|
|
ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4;
|
|
ins->src[1].reg.u.immconst_f32[0] = 0.0f;
|
|
ins->src[1].reg.u.immconst_f32[1] = 0.0f;
|
|
ins->src[1].reg.u.immconst_f32[2] = 0.0f;
|
|
ins->src[1].reg.u.immconst_f32[3] = 0.0f;
|
|
|
|
/* tmp.x = tmp.x || tmp.y */
|
|
/* tmp.x = tmp.x || tmp.z */
|
|
/* tmp.x = tmp.x || tmp.w, if sm >= 2.0 */
|
|
|
|
for (j = 1; j < components_read; ++j)
|
|
{
|
|
ins = &instructions->elements[pos + 1 + j];
|
|
if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_OR, 1, 2)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
vsir_register_init(&ins->dst[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
|
|
ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
|
|
ins->dst[0].reg.idx[0].offset = *tmp_idx;
|
|
ins->dst[0].write_mask = VKD3DSP_WRITEMASK_0;
|
|
|
|
vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
|
|
ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
|
|
ins->src[0].reg.idx[0].offset = *tmp_idx;
|
|
ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
|
|
vsir_register_init(&ins->src[1].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
|
|
ins->src[1].reg.dimension = VSIR_DIMENSION_VEC4;
|
|
ins->src[1].reg.idx[0].offset = *tmp_idx;
|
|
ins->src[1].swizzle = vkd3d_shader_create_swizzle(j, j, j, j);
|
|
}
|
|
|
|
/* discard_nz tmp.x */
|
|
|
|
ins = &instructions->elements[pos + 1 + components_read];
|
|
if (!(vsir_instruction_init_with_params(program, ins, &texkill->location, VKD3DSIH_DISCARD, 0, 1)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ;
|
|
|
|
vsir_register_init(&ins->src[0].reg, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
|
|
ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4;
|
|
ins->src[0].reg.idx[0].offset = *tmp_idx;
|
|
ins->src[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
|
|
|
|
/* Make the original instruction no-op */
|
|
vkd3d_shader_instruction_make_nop(texkill);
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
/* The Shader Model 5 Assembly documentation states: "If components of a mad
|
|
* instruction are tagged as precise, the hardware must execute a mad instruction
|
|
* or the exact equivalent, and it cannot split it into a multiply followed by an add."
|
|
* But DXIL.rst states the opposite: "Floating point multiply & add. This operation is
|
|
* not fused for "precise" operations."
|
|
* Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */
|
|
static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *program,
|
|
struct vkd3d_shader_instruction *mad, unsigned int *tmp_idx)
|
|
{
|
|
struct vkd3d_shader_instruction_array *instructions = &program->instructions;
|
|
struct vkd3d_shader_instruction *mul_ins, *add_ins;
|
|
size_t pos = mad - instructions->elements;
|
|
struct vkd3d_shader_dst_param *mul_dst;
|
|
|
|
if (!(mad->flags & VKD3DSI_PRECISE_XYZW))
|
|
return VKD3D_OK;
|
|
|
|
if (!shader_instruction_array_insert_at(instructions, pos + 1, 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
if (*tmp_idx == ~0u)
|
|
*tmp_idx = program->temp_count++;
|
|
|
|
mul_ins = &instructions->elements[pos];
|
|
add_ins = &instructions->elements[pos + 1];
|
|
|
|
mul_ins->handler_idx = VKD3DSIH_MUL;
|
|
mul_ins->src_count = 2;
|
|
|
|
if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
add_ins->flags = mul_ins->flags & VKD3DSI_PRECISE_XYZW;
|
|
|
|
mul_dst = mul_ins->dst;
|
|
*add_ins->dst = *mul_dst;
|
|
|
|
mul_dst->modifiers = 0;
|
|
vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, mul_ins->src[0].reg.data_type, 1);
|
|
mul_dst->reg.dimension = add_ins->dst->reg.dimension;
|
|
mul_dst->reg.idx[0].offset = *tmp_idx;
|
|
|
|
add_ins->src[0].reg = mul_dst->reg;
|
|
add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask);
|
|
add_ins->src[0].modifiers = 0;
|
|
add_ins->src[1] = mul_ins->src[2];
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program)
|
|
{
|
|
struct vkd3d_shader_instruction_array *instructions = &program->instructions;
|
|
unsigned int tmp_idx = ~0u, i;
|
|
enum vkd3d_result ret;
|
|
|
|
for (i = 0; i < instructions->count; ++i)
|
|
{
|
|
struct vkd3d_shader_instruction *ins = &instructions->elements[i];
|
|
|
|
switch (ins->handler_idx)
|
|
{
|
|
case VKD3DSIH_TEXKILL:
|
|
if ((ret = vsir_program_lower_texkill(program, ins, &tmp_idx)) < 0)
|
|
return ret;
|
|
break;
|
|
|
|
case VKD3DSIH_MAD:
|
|
if ((ret = vsir_program_lower_precise_mad(program, ins, &tmp_idx)) < 0)
|
|
return ret;
|
|
break;
|
|
|
|
case VKD3DSIH_DCL_CONSTANT_BUFFER:
|
|
case VKD3DSIH_DCL_TEMPS:
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static void shader_register_eliminate_phase_addressing(struct vkd3d_shader_register *reg,
|
|
unsigned int instance_id)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < reg->idx_count; ++i)
|
|
{
|
|
if (reg->idx[i].rel_addr && shader_register_is_phase_instance_id(®->idx[i].rel_addr->reg))
|
|
{
|
|
reg->idx[i].rel_addr = NULL;
|
|
reg->idx[i].offset += instance_id;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void shader_instruction_eliminate_phase_instance_id(struct vkd3d_shader_instruction *ins,
|
|
unsigned int instance_id)
|
|
{
|
|
struct vkd3d_shader_register *reg;
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < ins->src_count; ++i)
|
|
{
|
|
reg = (struct vkd3d_shader_register *)&ins->src[i].reg;
|
|
if (shader_register_is_phase_instance_id(reg))
|
|
{
|
|
vsir_register_init(reg, VKD3DSPR_IMMCONST, reg->data_type, 0);
|
|
reg->u.immconst_u32[0] = instance_id;
|
|
continue;
|
|
}
|
|
shader_register_eliminate_phase_addressing(reg, instance_id);
|
|
}
|
|
|
|
for (i = 0; i < ins->dst_count; ++i)
|
|
shader_register_eliminate_phase_addressing((struct vkd3d_shader_register *)&ins->dst[i].reg, instance_id);
|
|
}
|
|
|
|
static const struct vkd3d_shader_varying_map *find_varying_map(
|
|
const struct vkd3d_shader_varying_map_info *varying_map, unsigned int signature_idx)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < varying_map->varying_count; ++i)
|
|
{
|
|
if (varying_map->varying_map[i].output_signature_index == signature_idx)
|
|
return &varying_map->varying_map[i];
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program,
|
|
const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context)
|
|
{
|
|
const struct vkd3d_shader_location location = {.source_name = compile_info->source_name};
|
|
struct shader_signature *signature = &program->output_signature;
|
|
const struct vkd3d_shader_varying_map_info *varying_map;
|
|
unsigned int i;
|
|
|
|
if (!(varying_map = vkd3d_find_struct(compile_info->next, VARYING_MAP_INFO)))
|
|
return VKD3D_OK;
|
|
|
|
for (i = 0; i < signature->element_count; ++i)
|
|
{
|
|
const struct vkd3d_shader_varying_map *map = find_varying_map(varying_map, i);
|
|
struct signature_element *e = &signature->elements[i];
|
|
|
|
if (map)
|
|
{
|
|
unsigned int input_mask = map->input_mask;
|
|
|
|
e->target_location = map->input_register_index;
|
|
|
|
/* It is illegal in Vulkan if the next shader uses the same varying
|
|
* location with a different mask. */
|
|
if (input_mask && input_mask != e->mask)
|
|
{
|
|
vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
|
|
"Aborting due to not yet implemented feature: "
|
|
"Output mask %#x does not match input mask %#x.",
|
|
e->mask, input_mask);
|
|
return VKD3D_ERROR_NOT_IMPLEMENTED;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
e->target_location = SIGNATURE_TARGET_LOCATION_UNUSED;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < varying_map->varying_count; ++i)
|
|
{
|
|
if (varying_map->varying_map[i].output_signature_index >= signature->element_count)
|
|
{
|
|
vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
|
|
"Aborting due to not yet implemented feature: "
|
|
"The next stage consumes varyings not written by this stage.");
|
|
return VKD3D_ERROR_NOT_IMPLEMENTED;
|
|
}
|
|
}
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
struct hull_flattener
|
|
{
|
|
struct vkd3d_shader_instruction_array instructions;
|
|
|
|
unsigned int instance_count;
|
|
unsigned int phase_body_idx;
|
|
enum vkd3d_shader_opcode phase;
|
|
struct vkd3d_shader_location last_ret_location;
|
|
};
|
|
|
|
static bool flattener_is_in_fork_or_join_phase(const struct hull_flattener *flattener)
|
|
{
|
|
return flattener->phase == VKD3DSIH_HS_FORK_PHASE || flattener->phase == VKD3DSIH_HS_JOIN_PHASE;
|
|
}
|
|
|
|
struct shader_phase_location
|
|
{
|
|
unsigned int index;
|
|
unsigned int instance_count;
|
|
unsigned int instruction_count;
|
|
};
|
|
|
|
struct shader_phase_location_array
|
|
{
|
|
/* Unlikely worst case: one phase for each component of each output register. */
|
|
struct shader_phase_location locations[MAX_REG_OUTPUT * VKD3D_VEC4_SIZE];
|
|
unsigned int count;
|
|
};
|
|
|
|
static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normaliser,
|
|
unsigned int index, struct shader_phase_location_array *locations)
|
|
{
|
|
struct vkd3d_shader_instruction *ins = &normaliser->instructions.elements[index];
|
|
struct shader_phase_location *loc;
|
|
bool b;
|
|
|
|
if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE)
|
|
{
|
|
b = flattener_is_in_fork_or_join_phase(normaliser);
|
|
/* Reset the phase info. */
|
|
normaliser->phase_body_idx = ~0u;
|
|
normaliser->phase = ins->handler_idx;
|
|
normaliser->instance_count = 1;
|
|
/* Leave the first occurrence and delete the rest. */
|
|
if (b)
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
return;
|
|
}
|
|
else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT
|
|
|| ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT)
|
|
{
|
|
normaliser->instance_count = ins->declaration.count + !ins->declaration.count;
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
return;
|
|
}
|
|
else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id(
|
|
&ins->declaration.dst.reg))
|
|
{
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
return;
|
|
}
|
|
|
|
if (normaliser->phase == VKD3DSIH_INVALID || vsir_instruction_is_dcl(ins))
|
|
return;
|
|
|
|
if (normaliser->phase_body_idx == ~0u)
|
|
normaliser->phase_body_idx = index;
|
|
|
|
if (ins->handler_idx == VKD3DSIH_RET)
|
|
{
|
|
normaliser->last_ret_location = ins->location;
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
if (locations->count >= ARRAY_SIZE(locations->locations))
|
|
{
|
|
FIXME("Insufficient space for phase location.\n");
|
|
return;
|
|
}
|
|
loc = &locations->locations[locations->count++];
|
|
loc->index = normaliser->phase_body_idx;
|
|
loc->instance_count = normaliser->instance_count;
|
|
loc->instruction_count = index - normaliser->phase_body_idx;
|
|
}
|
|
}
|
|
|
|
static enum vkd3d_result flattener_flatten_phases(struct hull_flattener *normaliser,
|
|
struct shader_phase_location_array *locations)
|
|
{
|
|
struct shader_phase_location *loc;
|
|
unsigned int i, j, k, end, count;
|
|
|
|
for (i = 0, count = 0; i < locations->count; ++i)
|
|
count += (locations->locations[i].instance_count - 1) * locations->locations[i].instruction_count;
|
|
|
|
if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
end = normaliser->instructions.count;
|
|
normaliser->instructions.count += count;
|
|
|
|
for (i = locations->count; i > 0; --i)
|
|
{
|
|
loc = &locations->locations[i - 1];
|
|
j = loc->index + loc->instruction_count;
|
|
memmove(&normaliser->instructions.elements[j + count], &normaliser->instructions.elements[j],
|
|
(end - j) * sizeof(*normaliser->instructions.elements));
|
|
end = j;
|
|
count -= (loc->instance_count - 1) * loc->instruction_count;
|
|
loc->index += count;
|
|
}
|
|
|
|
for (i = 0, count = 0; i < locations->count; ++i)
|
|
{
|
|
loc = &locations->locations[i];
|
|
/* Make a copy of the non-dcl instructions for each instance. */
|
|
for (j = 1; j < loc->instance_count; ++j)
|
|
{
|
|
for (k = 0; k < loc->instruction_count; ++k)
|
|
{
|
|
if (!shader_instruction_array_clone_instruction(&normaliser->instructions,
|
|
loc->index + loc->instruction_count * j + k, loc->index + k))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
}
|
|
/* Replace each reference to the instance id with a constant instance id. */
|
|
for (j = 0; j < loc->instance_count; ++j)
|
|
{
|
|
for (k = 0; k < loc->instruction_count; ++k)
|
|
shader_instruction_eliminate_phase_instance_id(
|
|
&normaliser->instructions.elements[loc->index + loc->instruction_count * j + k], j);
|
|
}
|
|
}
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
void vsir_register_init(struct vkd3d_shader_register *reg, enum vkd3d_shader_register_type reg_type,
|
|
enum vkd3d_data_type data_type, unsigned int idx_count)
|
|
{
|
|
reg->type = reg_type;
|
|
reg->precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT;
|
|
reg->non_uniform = false;
|
|
reg->data_type = data_type;
|
|
reg->idx[0].offset = ~0u;
|
|
reg->idx[0].rel_addr = NULL;
|
|
reg->idx[0].is_in_bounds = false;
|
|
reg->idx[1].offset = ~0u;
|
|
reg->idx[1].rel_addr = NULL;
|
|
reg->idx[1].is_in_bounds = false;
|
|
reg->idx[2].offset = ~0u;
|
|
reg->idx[2].rel_addr = NULL;
|
|
reg->idx[2].is_in_bounds = false;
|
|
reg->idx_count = idx_count;
|
|
reg->dimension = VSIR_DIMENSION_SCALAR;
|
|
reg->alignment = 0;
|
|
}
|
|
|
|
void vsir_src_param_init(struct vkd3d_shader_src_param *param, enum vkd3d_shader_register_type reg_type,
|
|
enum vkd3d_data_type data_type, unsigned int idx_count)
|
|
{
|
|
vsir_register_init(¶m->reg, reg_type, data_type, idx_count);
|
|
param->swizzle = 0;
|
|
param->modifiers = VKD3DSPSM_NONE;
|
|
}
|
|
|
|
void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader_register_type reg_type,
|
|
enum vkd3d_data_type data_type, unsigned int idx_count)
|
|
{
|
|
vsir_register_init(¶m->reg, reg_type, data_type, idx_count);
|
|
param->write_mask = VKD3DSP_WRITEMASK_0;
|
|
param->modifiers = VKD3DSPDM_NONE;
|
|
param->shift = 0;
|
|
}
|
|
|
|
void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id)
|
|
{
|
|
vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1);
|
|
param->reg.dimension = VSIR_DIMENSION_NONE;
|
|
param->reg.idx[0].offset = label_id;
|
|
}
|
|
|
|
static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned int idx)
|
|
{
|
|
vsir_src_param_init(src, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1);
|
|
src->reg.idx[0].offset = idx;
|
|
}
|
|
|
|
static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx)
|
|
{
|
|
vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1);
|
|
src->reg.idx[0].offset = idx;
|
|
}
|
|
|
|
static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx)
|
|
{
|
|
vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1);
|
|
dst->reg.idx[0].offset = idx;
|
|
}
|
|
|
|
static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx)
|
|
{
|
|
vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1);
|
|
dst->reg.idx[0].offset = idx;
|
|
}
|
|
|
|
static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx)
|
|
{
|
|
vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
|
|
dst->reg.idx[0].offset = idx;
|
|
dst->write_mask = VKD3DSP_WRITEMASK_0;
|
|
}
|
|
|
|
static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx)
|
|
{
|
|
vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1);
|
|
src->reg.idx[0].offset = idx;
|
|
}
|
|
|
|
static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32_t value)
|
|
{
|
|
vsir_src_param_init(src, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0);
|
|
src->reg.u.immconst_u32[0] = value;
|
|
}
|
|
|
|
void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location,
|
|
enum vkd3d_shader_opcode handler_idx)
|
|
{
|
|
memset(ins, 0, sizeof(*ins));
|
|
ins->location = *location;
|
|
ins->handler_idx = handler_idx;
|
|
}
|
|
|
|
static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins,
|
|
const struct vkd3d_shader_location *location, unsigned int label_id, struct vsir_program *program)
|
|
{
|
|
struct vkd3d_shader_src_param *src_param;
|
|
|
|
if (!(src_param = vsir_program_get_src_params(program, 1)))
|
|
return false;
|
|
|
|
vsir_src_param_init_label(src_param, label_id);
|
|
|
|
vsir_instruction_init(ins, location, VKD3DSIH_LABEL);
|
|
ins->src = src_param;
|
|
ins->src_count = 1;
|
|
|
|
return true;
|
|
}
|
|
|
|
static enum vkd3d_result instruction_array_flatten_hull_shader_phases(struct vkd3d_shader_instruction_array *src_instructions)
|
|
{
|
|
struct hull_flattener flattener = {*src_instructions};
|
|
struct vkd3d_shader_instruction_array *instructions;
|
|
struct shader_phase_location_array locations;
|
|
enum vkd3d_result result = VKD3D_OK;
|
|
unsigned int i;
|
|
|
|
instructions = &flattener.instructions;
|
|
|
|
flattener.phase = VKD3DSIH_INVALID;
|
|
for (i = 0, locations.count = 0; i < instructions->count; ++i)
|
|
flattener_eliminate_phase_related_dcls(&flattener, i, &locations);
|
|
|
|
if ((result = flattener_flatten_phases(&flattener, &locations)) < 0)
|
|
return result;
|
|
|
|
if (flattener.phase != VKD3DSIH_INVALID)
|
|
{
|
|
if (!shader_instruction_array_reserve(&flattener.instructions, flattener.instructions.count + 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
vsir_instruction_init(&instructions->elements[instructions->count++], &flattener.last_ret_location, VKD3DSIH_RET);
|
|
}
|
|
|
|
*src_instructions = flattener.instructions;
|
|
return result;
|
|
}
|
|
|
|
struct control_point_normaliser
|
|
{
|
|
struct vkd3d_shader_instruction_array instructions;
|
|
enum vkd3d_shader_opcode phase;
|
|
struct vkd3d_shader_src_param *outpointid_param;
|
|
};
|
|
|
|
static bool control_point_normaliser_is_in_control_point_phase(const struct control_point_normaliser *normaliser)
|
|
{
|
|
return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE;
|
|
}
|
|
|
|
static struct vkd3d_shader_src_param *instruction_array_create_outpointid_param(
|
|
struct vkd3d_shader_instruction_array *instructions)
|
|
{
|
|
struct vkd3d_shader_src_param *rel_addr;
|
|
|
|
if (!(rel_addr = shader_src_param_allocator_get(&instructions->src_params, 1)))
|
|
return NULL;
|
|
|
|
vsir_register_init(&rel_addr->reg, VKD3DSPR_OUTPOINTID, VKD3D_DATA_UINT, 0);
|
|
rel_addr->swizzle = 0;
|
|
rel_addr->modifiers = 0;
|
|
|
|
return rel_addr;
|
|
}
|
|
|
|
static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param *dst_param,
|
|
struct control_point_normaliser *normaliser)
|
|
{
|
|
struct vkd3d_shader_register *reg = &dst_param->reg;
|
|
|
|
if (control_point_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT)
|
|
{
|
|
/* The TPF reader validates idx_count. */
|
|
assert(reg->idx_count == 1);
|
|
reg->idx[1] = reg->idx[0];
|
|
/* The control point id param is implicit here. Avoid later complications by inserting it. */
|
|
reg->idx[0].offset = 0;
|
|
reg->idx[0].rel_addr = normaliser->outpointid_param;
|
|
++reg->idx_count;
|
|
}
|
|
}
|
|
|
|
static void shader_dst_param_io_init(struct vkd3d_shader_dst_param *param, const struct signature_element *e,
|
|
enum vkd3d_shader_register_type reg_type, unsigned int idx_count)
|
|
{
|
|
param->write_mask = e->mask;
|
|
param->modifiers = 0;
|
|
param->shift = 0;
|
|
vsir_register_init(¶m->reg, reg_type, vkd3d_data_type_from_component_type(e->component_type), idx_count);
|
|
}
|
|
|
|
static enum vkd3d_result control_point_normaliser_emit_hs_input(struct control_point_normaliser *normaliser,
|
|
const struct shader_signature *s, unsigned int input_control_point_count, unsigned int dst,
|
|
const struct vkd3d_shader_location *location)
|
|
{
|
|
struct vkd3d_shader_instruction *ins;
|
|
struct vkd3d_shader_dst_param *param;
|
|
const struct signature_element *e;
|
|
unsigned int i, count;
|
|
|
|
for (i = 0, count = 1; i < s->element_count; ++i)
|
|
count += !!s->elements[i].used_mask;
|
|
|
|
if (!shader_instruction_array_reserve(&normaliser->instructions, normaliser->instructions.count + count))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
memmove(&normaliser->instructions.elements[dst + count], &normaliser->instructions.elements[dst],
|
|
(normaliser->instructions.count - dst) * sizeof(*normaliser->instructions.elements));
|
|
normaliser->instructions.count += count;
|
|
|
|
ins = &normaliser->instructions.elements[dst];
|
|
vsir_instruction_init(ins, location, VKD3DSIH_HS_CONTROL_POINT_PHASE);
|
|
ins->flags = 1;
|
|
++ins;
|
|
|
|
for (i = 0; i < s->element_count; ++i)
|
|
{
|
|
e = &s->elements[i];
|
|
if (!e->used_mask)
|
|
continue;
|
|
|
|
if (e->sysval_semantic != VKD3D_SHADER_SV_NONE)
|
|
{
|
|
vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT_SIV);
|
|
param = &ins->declaration.register_semantic.reg;
|
|
ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval(e->sysval_semantic);
|
|
}
|
|
else
|
|
{
|
|
vsir_instruction_init(ins, location, VKD3DSIH_DCL_INPUT);
|
|
param = &ins->declaration.dst;
|
|
}
|
|
|
|
shader_dst_param_io_init(param, e, VKD3DSPR_INPUT, 2);
|
|
param->reg.idx[0].offset = input_control_point_count;
|
|
param->reg.idx[1].offset = e->register_index;
|
|
param->write_mask = e->mask;
|
|
|
|
++ins;
|
|
}
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_io(
|
|
struct vkd3d_shader_instruction_array *src_instructions, const struct shader_signature *input_signature)
|
|
{
|
|
struct vkd3d_shader_instruction_array *instructions;
|
|
struct control_point_normaliser normaliser;
|
|
unsigned int input_control_point_count;
|
|
struct vkd3d_shader_location location;
|
|
struct vkd3d_shader_instruction *ins;
|
|
enum vkd3d_result ret;
|
|
unsigned int i, j;
|
|
|
|
if (!(normaliser.outpointid_param = instruction_array_create_outpointid_param(src_instructions)))
|
|
{
|
|
ERR("Failed to allocate src param.\n");
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
normaliser.instructions = *src_instructions;
|
|
instructions = &normaliser.instructions;
|
|
normaliser.phase = VKD3DSIH_INVALID;
|
|
|
|
for (i = 0; i < normaliser.instructions.count; ++i)
|
|
{
|
|
ins = &instructions->elements[i];
|
|
|
|
switch (ins->handler_idx)
|
|
{
|
|
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
|
|
case VKD3DSIH_HS_FORK_PHASE:
|
|
case VKD3DSIH_HS_JOIN_PHASE:
|
|
normaliser.phase = ins->handler_idx;
|
|
break;
|
|
default:
|
|
if (vsir_instruction_is_dcl(ins))
|
|
break;
|
|
for (j = 0; j < ins->dst_count; ++j)
|
|
shader_dst_param_normalise_outpointid(&ins->dst[j], &normaliser);
|
|
break;
|
|
}
|
|
}
|
|
|
|
normaliser.phase = VKD3DSIH_INVALID;
|
|
input_control_point_count = 1;
|
|
|
|
for (i = 0; i < instructions->count; ++i)
|
|
{
|
|
ins = &instructions->elements[i];
|
|
|
|
switch (ins->handler_idx)
|
|
{
|
|
case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT:
|
|
input_control_point_count = ins->declaration.count;
|
|
break;
|
|
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
|
|
*src_instructions = normaliser.instructions;
|
|
return VKD3D_OK;
|
|
case VKD3DSIH_HS_FORK_PHASE:
|
|
case VKD3DSIH_HS_JOIN_PHASE:
|
|
/* ins may be relocated if the instruction array expands. */
|
|
location = ins->location;
|
|
ret = control_point_normaliser_emit_hs_input(&normaliser, input_signature,
|
|
input_control_point_count, i, &location);
|
|
*src_instructions = normaliser.instructions;
|
|
return ret;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
*src_instructions = normaliser.instructions;
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
struct io_normaliser
|
|
{
|
|
struct vkd3d_shader_instruction_array instructions;
|
|
enum vkd3d_shader_type shader_type;
|
|
uint8_t major;
|
|
struct shader_signature *input_signature;
|
|
struct shader_signature *output_signature;
|
|
struct shader_signature *patch_constant_signature;
|
|
|
|
unsigned int instance_count;
|
|
unsigned int phase_body_idx;
|
|
enum vkd3d_shader_opcode phase;
|
|
unsigned int output_control_point_count;
|
|
|
|
struct vkd3d_shader_src_param *outpointid_param;
|
|
|
|
struct vkd3d_shader_dst_param *input_dcl_params[MAX_REG_OUTPUT];
|
|
struct vkd3d_shader_dst_param *output_dcl_params[MAX_REG_OUTPUT];
|
|
struct vkd3d_shader_dst_param *pc_dcl_params[MAX_REG_OUTPUT];
|
|
uint8_t input_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE];
|
|
uint8_t output_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE];
|
|
uint8_t pc_range_map[MAX_REG_OUTPUT][VKD3D_VEC4_SIZE];
|
|
|
|
bool use_vocp;
|
|
};
|
|
|
|
static bool io_normaliser_is_in_fork_or_join_phase(const struct io_normaliser *normaliser)
|
|
{
|
|
return normaliser->phase == VKD3DSIH_HS_FORK_PHASE || normaliser->phase == VKD3DSIH_HS_JOIN_PHASE;
|
|
}
|
|
|
|
static bool io_normaliser_is_in_control_point_phase(const struct io_normaliser *normaliser)
|
|
{
|
|
return normaliser->phase == VKD3DSIH_HS_CONTROL_POINT_PHASE;
|
|
}
|
|
|
|
static unsigned int shader_signature_find_element_for_reg(const struct shader_signature *signature,
|
|
unsigned int reg_idx, unsigned int write_mask)
|
|
{
|
|
unsigned int i, base_write_mask;
|
|
|
|
for (i = 0; i < signature->element_count; ++i)
|
|
{
|
|
struct signature_element *e = &signature->elements[i];
|
|
if (e->register_index <= reg_idx && e->register_index + e->register_count > reg_idx
|
|
&& (e->mask & write_mask) == write_mask)
|
|
{
|
|
return i;
|
|
}
|
|
}
|
|
|
|
/* Validated in the TPF reader, but failure in signature_element_range_expand_mask()
|
|
* can land us here on an unmatched vector mask. */
|
|
FIXME("Failed to find signature element for register index %u, mask %#x; using scalar mask.\n",
|
|
reg_idx, write_mask);
|
|
base_write_mask = 1u << vsir_write_mask_get_component_idx(write_mask);
|
|
if (base_write_mask != write_mask)
|
|
return shader_signature_find_element_for_reg(signature, reg_idx, base_write_mask);
|
|
|
|
vkd3d_unreachable();
|
|
}
|
|
|
|
struct signature_element *vsir_signature_find_element_for_reg(const struct shader_signature *signature,
|
|
unsigned int reg_idx, unsigned int write_mask)
|
|
{
|
|
return &signature->elements[shader_signature_find_element_for_reg(signature, reg_idx, write_mask)];
|
|
}
|
|
|
|
static unsigned int range_map_get_register_count(uint8_t range_map[][VKD3D_VEC4_SIZE],
|
|
unsigned int register_idx, uint32_t write_mask)
|
|
{
|
|
return range_map[register_idx][vsir_write_mask_get_component_idx(write_mask)];
|
|
}
|
|
|
|
static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], unsigned int register_idx,
|
|
unsigned int register_count, uint32_t write_mask, bool is_dcl_indexrange)
|
|
{
|
|
unsigned int i, j, r, c, component_idx, component_count;
|
|
|
|
assert(write_mask <= VKD3DSP_WRITEMASK_ALL);
|
|
component_idx = vsir_write_mask_get_component_idx(write_mask);
|
|
component_count = vsir_write_mask_component_count(write_mask);
|
|
|
|
assert(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count);
|
|
|
|
if (range_map[register_idx][component_idx] > register_count && is_dcl_indexrange)
|
|
{
|
|
/* Validated in the TPF reader. */
|
|
assert(range_map[register_idx][component_idx] != UINT8_MAX);
|
|
return;
|
|
}
|
|
if (range_map[register_idx][component_idx] == register_count)
|
|
{
|
|
/* Already done. This happens when fxc splits a register declaration by
|
|
* component(s). The dcl_indexrange instructions are split too. */
|
|
return;
|
|
}
|
|
range_map[register_idx][component_idx] = register_count;
|
|
|
|
for (i = 0; i < register_count; ++i)
|
|
{
|
|
r = register_idx + i;
|
|
for (j = !i; j < component_count; ++j)
|
|
{
|
|
c = component_idx + j;
|
|
/* A synthetic patch constant range which overlaps an existing range can start upstream of it
|
|
* for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap.
|
|
* The latter is validated in the TPF reader. */
|
|
assert(!range_map[r][c] || !is_dcl_indexrange);
|
|
range_map[r][c] = UINT8_MAX;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void io_normaliser_add_index_range(struct io_normaliser *normaliser,
|
|
const struct vkd3d_shader_instruction *ins)
|
|
{
|
|
const struct vkd3d_shader_index_range *range = &ins->declaration.index_range;
|
|
const struct vkd3d_shader_register *reg = &range->dst.reg;
|
|
unsigned int reg_idx, write_mask, element_idx;
|
|
const struct shader_signature *signature;
|
|
uint8_t (*range_map)[VKD3D_VEC4_SIZE];
|
|
|
|
switch (reg->type)
|
|
{
|
|
case VKD3DSPR_INPUT:
|
|
case VKD3DSPR_INCONTROLPOINT:
|
|
range_map = normaliser->input_range_map;
|
|
signature = normaliser->input_signature;
|
|
break;
|
|
case VKD3DSPR_OUTCONTROLPOINT:
|
|
range_map = normaliser->output_range_map;
|
|
signature = normaliser->output_signature;
|
|
break;
|
|
case VKD3DSPR_OUTPUT:
|
|
if (!io_normaliser_is_in_fork_or_join_phase(normaliser))
|
|
{
|
|
range_map = normaliser->output_range_map;
|
|
signature = normaliser->output_signature;
|
|
break;
|
|
}
|
|
/* fall through */
|
|
case VKD3DSPR_PATCHCONST:
|
|
range_map = normaliser->pc_range_map;
|
|
signature = normaliser->patch_constant_signature;
|
|
break;
|
|
default:
|
|
/* Validated in the TPF reader. */
|
|
vkd3d_unreachable();
|
|
}
|
|
|
|
reg_idx = reg->idx[reg->idx_count - 1].offset;
|
|
write_mask = range->dst.write_mask;
|
|
element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask);
|
|
range_map_set_register_range(range_map, reg_idx, range->register_count,
|
|
signature->elements[element_idx].mask, true);
|
|
}
|
|
|
|
static int signature_element_mask_compare(const void *a, const void *b)
|
|
{
|
|
const struct signature_element *e = a, *f = b;
|
|
int ret;
|
|
|
|
return (ret = vkd3d_u32_compare(e->mask, f->mask)) ? ret : vkd3d_u32_compare(e->register_index, f->register_index);
|
|
}
|
|
|
|
static bool sysval_semantics_should_merge(const struct signature_element *e, const struct signature_element *f)
|
|
{
|
|
if (e->sysval_semantic < VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE
|
|
|| e->sysval_semantic > VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN)
|
|
return false;
|
|
|
|
return e->sysval_semantic == f->sysval_semantic
|
|
/* Line detail and density must be merged together to match the SPIR-V array.
|
|
* This deletes one of the two sysvals, but these are not used. */
|
|
|| (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET
|
|
&& f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN)
|
|
|| (e->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN
|
|
&& f->sysval_semantic == VKD3D_SHADER_SV_TESS_FACTOR_LINEDET);
|
|
}
|
|
|
|
/* Merge tess factor sysvals because they are an array in SPIR-V. */
|
|
static void shader_signature_map_patch_constant_index_ranges(struct shader_signature *s,
|
|
uint8_t range_map[][VKD3D_VEC4_SIZE])
|
|
{
|
|
struct signature_element *e, *f;
|
|
unsigned int i, j, register_count;
|
|
|
|
qsort(s->elements, s->element_count, sizeof(s->elements[0]), signature_element_mask_compare);
|
|
|
|
for (i = 0; i < s->element_count; i += register_count)
|
|
{
|
|
e = &s->elements[i];
|
|
register_count = 1;
|
|
|
|
if (!e->sysval_semantic)
|
|
continue;
|
|
|
|
for (j = i + 1; j < s->element_count; ++j, ++register_count)
|
|
{
|
|
f = &s->elements[j];
|
|
if (f->register_index != e->register_index + register_count || !sysval_semantics_should_merge(e, f))
|
|
break;
|
|
}
|
|
if (register_count < 2)
|
|
continue;
|
|
|
|
range_map_set_register_range(range_map, e->register_index, register_count, e->mask, false);
|
|
}
|
|
}
|
|
|
|
static int signature_element_register_compare(const void *a, const void *b)
|
|
{
|
|
const struct signature_element *e = a, *f = b;
|
|
|
|
return vkd3d_u32_compare(e->register_index, f->register_index);
|
|
}
|
|
|
|
static int signature_element_index_compare(const void *a, const void *b)
|
|
{
|
|
const struct signature_element *e = a, *f = b;
|
|
|
|
return vkd3d_u32_compare(e->sort_index, f->sort_index);
|
|
}
|
|
|
|
static unsigned int signature_element_range_expand_mask(struct signature_element *e, unsigned int register_count,
|
|
uint8_t range_map[][VKD3D_VEC4_SIZE])
|
|
{
|
|
unsigned int i, j, component_idx, component_count, merged_write_mask = e->mask;
|
|
|
|
/* dcl_indexrange instructions can declare a subset of the full mask, and the masks of
|
|
* the elements within the range may differ. TPF's handling of arrayed inputs with
|
|
* dcl_indexrange is really just a hack. Here we create a mask which covers all element
|
|
* masks, and check for collisions with other ranges. */
|
|
|
|
for (i = 1; i < register_count; ++i)
|
|
merged_write_mask |= e[i].mask;
|
|
|
|
if (merged_write_mask == e->mask)
|
|
return merged_write_mask;
|
|
|
|
/* Reaching this point is very rare to begin with, and collisions are even rarer or
|
|
* impossible. If the latter shows up, the fallback in shader_signature_find_element_for_reg()
|
|
* may be sufficient. */
|
|
|
|
component_idx = vsir_write_mask_get_component_idx(e->mask);
|
|
component_count = vsir_write_mask_component_count(e->mask);
|
|
|
|
for (i = e->register_index; i < e->register_index + register_count; ++i)
|
|
{
|
|
for (j = 0; j < component_idx; ++j)
|
|
if (range_map[i][j])
|
|
break;
|
|
for (j = component_idx + component_count; j < VKD3D_VEC4_SIZE; ++j)
|
|
if (range_map[i][j])
|
|
break;
|
|
}
|
|
|
|
if (i == register_count)
|
|
{
|
|
WARN("Expanding mask %#x to %#x for %s, base reg %u, count %u.\n", e->mask, merged_write_mask,
|
|
e->semantic_name, e->register_index, register_count);
|
|
return merged_write_mask;
|
|
}
|
|
|
|
WARN("Cannot expand mask %#x to %#x for %s, base reg %u, count %u.\n", e->mask, merged_write_mask,
|
|
e->semantic_name, e->register_index, register_count);
|
|
return e->mask;
|
|
}
|
|
|
|
static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map[][VKD3D_VEC4_SIZE],
|
|
bool is_patch_constant)
|
|
{
|
|
unsigned int i, j, element_count, new_count, register_count;
|
|
struct signature_element *elements;
|
|
struct signature_element *e, *f;
|
|
bool used;
|
|
|
|
element_count = s->element_count;
|
|
if (!(elements = vkd3d_malloc(element_count * sizeof(*elements))))
|
|
return false;
|
|
memcpy(elements, s->elements, element_count * sizeof(*elements));
|
|
|
|
qsort(elements, element_count, sizeof(elements[0]), signature_element_register_compare);
|
|
|
|
for (i = 0, new_count = 0; i < element_count; i = j, elements[new_count++] = *e)
|
|
{
|
|
e = &elements[i];
|
|
j = i + 1;
|
|
|
|
if (e->register_index == ~0u)
|
|
continue;
|
|
|
|
/* Do not merge if the register index will be relative-addressed. */
|
|
if (range_map_get_register_count(range_map, e->register_index, e->mask) > 1)
|
|
continue;
|
|
|
|
used = e->used_mask;
|
|
|
|
for (; j < element_count; ++j)
|
|
{
|
|
f = &elements[j];
|
|
|
|
/* Merge different components of the same register unless sysvals are different,
|
|
* or it will be relative-addressed. */
|
|
if (f->register_index != e->register_index || f->sysval_semantic != e->sysval_semantic
|
|
|| range_map_get_register_count(range_map, f->register_index, f->mask) > 1)
|
|
break;
|
|
|
|
TRACE("Merging %s, reg %u, mask %#x, sysval %#x with %s, mask %#x, sysval %#x.\n", e->semantic_name,
|
|
e->register_index, e->mask, e->sysval_semantic, f->semantic_name, f->mask, f->sysval_semantic);
|
|
assert(!(e->mask & f->mask));
|
|
|
|
e->mask |= f->mask;
|
|
e->used_mask |= f->used_mask;
|
|
e->semantic_index = min(e->semantic_index, f->semantic_index);
|
|
|
|
/* The first element may have no interpolation mode if it is unused. Elements which
|
|
* actually have different interpolation modes are assigned different registers. */
|
|
if (f->used_mask && !used)
|
|
{
|
|
if (e->interpolation_mode && e->interpolation_mode != f->interpolation_mode)
|
|
FIXME("Mismatching interpolation modes %u and %u.\n", e->interpolation_mode, f->interpolation_mode);
|
|
else
|
|
e->interpolation_mode = f->interpolation_mode;
|
|
}
|
|
}
|
|
}
|
|
element_count = new_count;
|
|
vkd3d_free(s->elements);
|
|
s->elements = elements;
|
|
s->element_count = element_count;
|
|
|
|
if (is_patch_constant)
|
|
shader_signature_map_patch_constant_index_ranges(s, range_map);
|
|
|
|
for (i = 0, new_count = 0; i < element_count; i += register_count, elements[new_count++] = *e)
|
|
{
|
|
e = &elements[i];
|
|
register_count = 1;
|
|
|
|
if (e->register_index >= MAX_REG_OUTPUT)
|
|
continue;
|
|
|
|
register_count = range_map_get_register_count(range_map, e->register_index, e->mask);
|
|
assert(register_count != UINT8_MAX);
|
|
register_count += !register_count;
|
|
|
|
if (register_count > 1)
|
|
{
|
|
TRACE("Merging %s, base reg %u, count %u.\n", e->semantic_name, e->register_index, register_count);
|
|
e->register_count = register_count;
|
|
e->mask = signature_element_range_expand_mask(e, register_count, range_map);
|
|
}
|
|
}
|
|
element_count = new_count;
|
|
|
|
/* Restoring the original order is required for sensible trace output. */
|
|
qsort(elements, element_count, sizeof(elements[0]), signature_element_index_compare);
|
|
|
|
s->element_count = element_count;
|
|
|
|
return true;
|
|
}
|
|
|
|
static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg,
|
|
unsigned int id_idx, unsigned int register_index)
|
|
{
|
|
assert(id_idx < ARRAY_SIZE(reg->idx) - 1);
|
|
|
|
/* For a relative-addressed register index, move the id up a slot to separate it from the address,
|
|
* because rel_addr can be replaced with a constant offset in some cases. */
|
|
if (reg->idx[id_idx].rel_addr)
|
|
{
|
|
reg->idx[id_idx + 1].rel_addr = NULL;
|
|
reg->idx[id_idx + 1].offset = reg->idx[id_idx].offset;
|
|
reg->idx[id_idx].offset -= register_index;
|
|
if (id_idx)
|
|
{
|
|
/* idx[id_idx] now contains the array index, which must be moved below the control point id. */
|
|
struct vkd3d_shader_register_index tmp = reg->idx[id_idx];
|
|
reg->idx[id_idx] = reg->idx[id_idx - 1];
|
|
reg->idx[id_idx - 1] = tmp;
|
|
}
|
|
++id_idx;
|
|
}
|
|
/* Otherwise we have no address for the arrayed register, so insert one. This happens e.g. where
|
|
* tessellation level registers are merged into an array because they're an array in SPIR-V. */
|
|
else
|
|
{
|
|
++id_idx;
|
|
memmove(®->idx[1], ®->idx[0], id_idx * sizeof(reg->idx[0]));
|
|
reg->idx[0].rel_addr = NULL;
|
|
reg->idx[0].offset = reg->idx[id_idx].offset - register_index;
|
|
}
|
|
|
|
return id_idx;
|
|
}
|
|
|
|
static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_param, bool is_io_dcl,
|
|
struct io_normaliser *normaliser)
|
|
{
|
|
unsigned int id_idx, reg_idx, write_mask, element_idx;
|
|
struct vkd3d_shader_register *reg = &dst_param->reg;
|
|
struct vkd3d_shader_dst_param **dcl_params;
|
|
const struct shader_signature *signature;
|
|
const struct signature_element *e;
|
|
|
|
switch (reg->type)
|
|
{
|
|
case VKD3DSPR_OUTPUT:
|
|
reg_idx = reg->idx[reg->idx_count - 1].offset;
|
|
if (io_normaliser_is_in_fork_or_join_phase(normaliser))
|
|
{
|
|
signature = normaliser->patch_constant_signature;
|
|
/* Convert patch constant outputs to the patch constant register type to avoid the need
|
|
* to convert compiler symbols when accessed as inputs in a later stage. */
|
|
reg->type = VKD3DSPR_PATCHCONST;
|
|
dcl_params = normaliser->pc_dcl_params;
|
|
}
|
|
else
|
|
{
|
|
signature = normaliser->output_signature;
|
|
dcl_params = normaliser->output_dcl_params;
|
|
}
|
|
break;
|
|
|
|
case VKD3DSPR_PATCHCONST:
|
|
reg_idx = reg->idx[reg->idx_count - 1].offset;
|
|
signature = normaliser->patch_constant_signature;
|
|
dcl_params = normaliser->pc_dcl_params;
|
|
break;
|
|
|
|
case VKD3DSPR_COLOROUT:
|
|
reg_idx = reg->idx[0].offset;
|
|
signature = normaliser->output_signature;
|
|
reg->type = VKD3DSPR_OUTPUT;
|
|
dcl_params = normaliser->output_dcl_params;
|
|
break;
|
|
|
|
case VKD3DSPR_INCONTROLPOINT:
|
|
case VKD3DSPR_INPUT:
|
|
reg_idx = reg->idx[reg->idx_count - 1].offset;
|
|
signature = normaliser->input_signature;
|
|
reg->type = VKD3DSPR_INPUT;
|
|
dcl_params = normaliser->input_dcl_params;
|
|
break;
|
|
|
|
case VKD3DSPR_ATTROUT:
|
|
reg_idx = SM1_COLOR_REGISTER_OFFSET + reg->idx[0].offset;
|
|
signature = normaliser->output_signature;
|
|
reg->type = VKD3DSPR_OUTPUT;
|
|
dcl_params = normaliser->output_dcl_params;
|
|
break;
|
|
|
|
case VKD3DSPR_RASTOUT:
|
|
reg_idx = SM1_RASTOUT_REGISTER_OFFSET + reg->idx[0].offset;
|
|
signature = normaliser->output_signature;
|
|
reg->type = VKD3DSPR_OUTPUT;
|
|
dcl_params = normaliser->output_dcl_params;
|
|
break;
|
|
|
|
default:
|
|
return true;
|
|
}
|
|
|
|
id_idx = reg->idx_count - 1;
|
|
write_mask = dst_param->write_mask;
|
|
element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask);
|
|
e = &signature->elements[element_idx];
|
|
|
|
dst_param->write_mask >>= vsir_write_mask_get_component_idx(e->mask);
|
|
if (is_io_dcl)
|
|
{
|
|
/* Validated in the TPF reader. */
|
|
assert(element_idx < ARRAY_SIZE(normaliser->input_dcl_params));
|
|
|
|
if (dcl_params[element_idx])
|
|
{
|
|
/* Merge split declarations into a single one. */
|
|
dcl_params[element_idx]->write_mask |= dst_param->write_mask;
|
|
/* Turn this into a nop. */
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
dcl_params[element_idx] = dst_param;
|
|
}
|
|
}
|
|
|
|
if (io_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT)
|
|
{
|
|
if (is_io_dcl)
|
|
{
|
|
/* Emit an array size for the control points for consistency with inputs. */
|
|
reg->idx[0].offset = normaliser->output_control_point_count;
|
|
}
|
|
else
|
|
{
|
|
/* The control point id param. */
|
|
assert(reg->idx[0].rel_addr);
|
|
}
|
|
id_idx = 1;
|
|
}
|
|
|
|
if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic)))
|
|
{
|
|
if (is_io_dcl)
|
|
{
|
|
/* For control point I/O, idx 0 contains the control point count.
|
|
* Ensure it is moved up to the next slot. */
|
|
reg->idx[id_idx].offset = reg->idx[0].offset;
|
|
reg->idx[0].offset = e->register_count;
|
|
++id_idx;
|
|
}
|
|
else
|
|
{
|
|
id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index);
|
|
}
|
|
}
|
|
|
|
/* Replace the register index with the signature element index */
|
|
reg->idx[id_idx].offset = element_idx;
|
|
reg->idx_count = id_idx + 1;
|
|
|
|
return true;
|
|
}
|
|
|
|
static void shader_src_param_io_normalise(struct vkd3d_shader_src_param *src_param,
|
|
struct io_normaliser *normaliser)
|
|
{
|
|
unsigned int i, id_idx, reg_idx, write_mask, element_idx, component_idx;
|
|
struct vkd3d_shader_register *reg = &src_param->reg;
|
|
const struct shader_signature *signature;
|
|
const struct signature_element *e;
|
|
|
|
/* Input/output registers from one phase can be used as inputs in
|
|
* subsequent phases. Specifically:
|
|
*
|
|
* - Control phase inputs are available as "vicp" in fork and join
|
|
* phases.
|
|
* - Control phase outputs are available as "vocp" in fork and join
|
|
* phases.
|
|
* - Fork phase patch constants are available as "vpc" in join
|
|
* phases.
|
|
*
|
|
* We handle "vicp" here by converting INCONTROLPOINT src registers to
|
|
* type INPUT so they match the control phase declarations. We handle
|
|
* "vocp" by converting OUTCONTROLPOINT registers to type OUTPUT.
|
|
* Merging fork and join phases handles "vpc". */
|
|
|
|
switch (reg->type)
|
|
{
|
|
case VKD3DSPR_PATCHCONST:
|
|
reg_idx = reg->idx[reg->idx_count - 1].offset;
|
|
signature = normaliser->patch_constant_signature;
|
|
break;
|
|
|
|
case VKD3DSPR_INCONTROLPOINT:
|
|
reg->type = VKD3DSPR_INPUT;
|
|
/* fall through */
|
|
case VKD3DSPR_INPUT:
|
|
if (normaliser->major < 3 && normaliser->shader_type == VKD3D_SHADER_TYPE_PIXEL)
|
|
reg_idx = SM1_COLOR_REGISTER_OFFSET + reg->idx[0].offset;
|
|
else
|
|
reg_idx = reg->idx[reg->idx_count - 1].offset;
|
|
signature = normaliser->input_signature;
|
|
break;
|
|
|
|
case VKD3DSPR_OUTCONTROLPOINT:
|
|
reg->type = VKD3DSPR_OUTPUT;
|
|
/* fall through */
|
|
case VKD3DSPR_OUTPUT:
|
|
reg_idx = reg->idx[reg->idx_count - 1].offset;
|
|
signature = normaliser->output_signature;
|
|
break;
|
|
|
|
case VKD3DSPR_TEXTURE:
|
|
if (normaliser->shader_type != VKD3D_SHADER_TYPE_PIXEL)
|
|
return;
|
|
reg->type = VKD3DSPR_INPUT;
|
|
reg_idx = reg->idx[0].offset;
|
|
signature = normaliser->input_signature;
|
|
break;
|
|
|
|
default:
|
|
return;
|
|
}
|
|
|
|
id_idx = reg->idx_count - 1;
|
|
write_mask = VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(src_param->swizzle, 0);
|
|
element_idx = shader_signature_find_element_for_reg(signature, reg_idx, write_mask);
|
|
|
|
e = &signature->elements[element_idx];
|
|
if ((e->register_count > 1 || vsir_sysval_semantic_is_tess_factor(e->sysval_semantic)))
|
|
id_idx = shader_register_normalise_arrayed_addressing(reg, id_idx, e->register_index);
|
|
reg->idx[id_idx].offset = element_idx;
|
|
reg->idx_count = id_idx + 1;
|
|
|
|
if ((component_idx = vsir_write_mask_get_component_idx(e->mask)))
|
|
{
|
|
for (i = 0; i < VKD3D_VEC4_SIZE; ++i)
|
|
if (vsir_swizzle_get_component(src_param->swizzle, i))
|
|
src_param->swizzle -= component_idx << VKD3D_SHADER_SWIZZLE_SHIFT(i);
|
|
}
|
|
}
|
|
|
|
static void shader_instruction_normalise_io_params(struct vkd3d_shader_instruction *ins,
|
|
struct io_normaliser *normaliser)
|
|
{
|
|
struct vkd3d_shader_register *reg;
|
|
unsigned int i;
|
|
|
|
switch (ins->handler_idx)
|
|
{
|
|
case VKD3DSIH_DCL_INPUT:
|
|
if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL)
|
|
{
|
|
reg = &ins->declaration.dst.reg;
|
|
|
|
if (reg->type == VKD3DSPR_OUTCONTROLPOINT)
|
|
normaliser->use_vocp = true;
|
|
|
|
/* We don't need to keep OUTCONTROLPOINT or PATCHCONST input declarations since their
|
|
* equivalents were declared earlier, but INCONTROLPOINT may be the first occurrence. */
|
|
if (reg->type == VKD3DSPR_OUTCONTROLPOINT || reg->type == VKD3DSPR_PATCHCONST)
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
else if (reg->type == VKD3DSPR_INCONTROLPOINT)
|
|
reg->type = VKD3DSPR_INPUT;
|
|
}
|
|
/* fall through */
|
|
case VKD3DSIH_DCL_INPUT_PS:
|
|
case VKD3DSIH_DCL_OUTPUT:
|
|
if (!shader_dst_param_io_normalise(&ins->declaration.dst, true, normaliser))
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
break;
|
|
case VKD3DSIH_DCL_INPUT_SGV:
|
|
case VKD3DSIH_DCL_INPUT_SIV:
|
|
case VKD3DSIH_DCL_INPUT_PS_SGV:
|
|
case VKD3DSIH_DCL_INPUT_PS_SIV:
|
|
case VKD3DSIH_DCL_OUTPUT_SIV:
|
|
if (!shader_dst_param_io_normalise(&ins->declaration.register_semantic.reg, true, normaliser))
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
break;
|
|
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
|
|
case VKD3DSIH_HS_FORK_PHASE:
|
|
case VKD3DSIH_HS_JOIN_PHASE:
|
|
normaliser->phase = ins->handler_idx;
|
|
memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params));
|
|
memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params));
|
|
memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params));
|
|
break;
|
|
default:
|
|
if (vsir_instruction_is_dcl(ins))
|
|
break;
|
|
for (i = 0; i < ins->dst_count; ++i)
|
|
shader_dst_param_io_normalise(&ins->dst[i], false, normaliser);
|
|
for (i = 0; i < ins->src_count; ++i)
|
|
shader_src_param_io_normalise(&ins->src[i], normaliser);
|
|
break;
|
|
}
|
|
}
|
|
|
|
static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program)
|
|
{
|
|
struct io_normaliser normaliser = {program->instructions};
|
|
struct vkd3d_shader_instruction *ins;
|
|
bool has_control_point_phase;
|
|
unsigned int i, j;
|
|
|
|
normaliser.phase = VKD3DSIH_INVALID;
|
|
normaliser.shader_type = program->shader_version.type;
|
|
normaliser.major = program->shader_version.major;
|
|
normaliser.input_signature = &program->input_signature;
|
|
normaliser.output_signature = &program->output_signature;
|
|
normaliser.patch_constant_signature = &program->patch_constant_signature;
|
|
|
|
for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i)
|
|
{
|
|
ins = &program->instructions.elements[i];
|
|
|
|
switch (ins->handler_idx)
|
|
{
|
|
case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT:
|
|
normaliser.output_control_point_count = ins->declaration.count;
|
|
break;
|
|
case VKD3DSIH_DCL_INDEX_RANGE:
|
|
io_normaliser_add_index_range(&normaliser, ins);
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
break;
|
|
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
|
|
has_control_point_phase = true;
|
|
/* fall through */
|
|
case VKD3DSIH_HS_FORK_PHASE:
|
|
case VKD3DSIH_HS_JOIN_PHASE:
|
|
normaliser.phase = ins->handler_idx;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (normaliser.shader_type == VKD3D_SHADER_TYPE_HULL && !has_control_point_phase)
|
|
{
|
|
/* Inputs and outputs must match for the default phase, so merge ranges must match too. */
|
|
for (i = 0; i < MAX_REG_OUTPUT; ++i)
|
|
{
|
|
for (j = 0; j < VKD3D_VEC4_SIZE; ++j)
|
|
{
|
|
if (!normaliser.input_range_map[i][j] && normaliser.output_range_map[i][j])
|
|
normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j];
|
|
else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j])
|
|
normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j];
|
|
else assert(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false)
|
|
|| !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false)
|
|
|| !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true))
|
|
{
|
|
program->instructions = normaliser.instructions;
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
normaliser.phase = VKD3DSIH_INVALID;
|
|
for (i = 0; i < normaliser.instructions.count; ++i)
|
|
shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser);
|
|
|
|
program->instructions = normaliser.instructions;
|
|
program->use_vocp = normaliser.use_vocp;
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
struct flat_constant_def
|
|
{
|
|
enum vkd3d_shader_d3dbc_constant_register set;
|
|
uint32_t index;
|
|
uint32_t value[4];
|
|
};
|
|
|
|
struct flat_constants_normaliser
|
|
{
|
|
struct flat_constant_def *defs;
|
|
size_t def_count, defs_capacity;
|
|
};
|
|
|
|
static bool get_flat_constant_register_type(const struct vkd3d_shader_register *reg,
|
|
enum vkd3d_shader_d3dbc_constant_register *set, uint32_t *index)
|
|
{
|
|
static const struct
|
|
{
|
|
enum vkd3d_shader_register_type type;
|
|
enum vkd3d_shader_d3dbc_constant_register set;
|
|
uint32_t offset;
|
|
}
|
|
regs[] =
|
|
{
|
|
{VKD3DSPR_CONST, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 0},
|
|
{VKD3DSPR_CONST2, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048},
|
|
{VKD3DSPR_CONST3, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096},
|
|
{VKD3DSPR_CONST4, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144},
|
|
{VKD3DSPR_CONSTINT, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, 0},
|
|
{VKD3DSPR_CONSTBOOL, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, 0},
|
|
};
|
|
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(regs); ++i)
|
|
{
|
|
if (reg->type == regs[i].type)
|
|
{
|
|
if (reg->idx[0].rel_addr)
|
|
{
|
|
FIXME("Unhandled relative address.\n");
|
|
return false;
|
|
}
|
|
|
|
*set = regs[i].set;
|
|
*index = regs[i].offset + reg->idx[0].offset;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void shader_register_normalise_flat_constants(struct vkd3d_shader_src_param *param,
|
|
const struct flat_constants_normaliser *normaliser)
|
|
{
|
|
enum vkd3d_shader_d3dbc_constant_register set;
|
|
uint32_t index;
|
|
size_t i, j;
|
|
|
|
if (!get_flat_constant_register_type(¶m->reg, &set, &index))
|
|
return;
|
|
|
|
for (i = 0; i < normaliser->def_count; ++i)
|
|
{
|
|
if (normaliser->defs[i].set == set && normaliser->defs[i].index == index)
|
|
{
|
|
param->reg.type = VKD3DSPR_IMMCONST;
|
|
param->reg.idx_count = 0;
|
|
param->reg.dimension = VSIR_DIMENSION_VEC4;
|
|
for (j = 0; j < 4; ++j)
|
|
param->reg.u.immconst_u32[j] = normaliser->defs[i].value[j];
|
|
return;
|
|
}
|
|
}
|
|
|
|
param->reg.type = VKD3DSPR_CONSTBUFFER;
|
|
param->reg.idx[0].offset = set; /* register ID */
|
|
param->reg.idx[1].offset = set; /* register index */
|
|
param->reg.idx[2].offset = index; /* buffer index */
|
|
param->reg.idx_count = 3;
|
|
}
|
|
|
|
static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_program *program)
|
|
{
|
|
struct flat_constants_normaliser normaliser = {0};
|
|
unsigned int i, j;
|
|
|
|
for (i = 0; i < program->instructions.count; ++i)
|
|
{
|
|
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
|
|
|
|
if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB)
|
|
{
|
|
struct flat_constant_def *def;
|
|
|
|
if (!vkd3d_array_reserve((void **)&normaliser.defs, &normaliser.defs_capacity,
|
|
normaliser.def_count + 1, sizeof(*normaliser.defs)))
|
|
{
|
|
vkd3d_free(normaliser.defs);
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
def = &normaliser.defs[normaliser.def_count++];
|
|
|
|
get_flat_constant_register_type((struct vkd3d_shader_register *)&ins->dst[0].reg, &def->set, &def->index);
|
|
for (j = 0; j < 4; ++j)
|
|
def->value[j] = ins->src[0].reg.u.immconst_u32[j];
|
|
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
}
|
|
else
|
|
{
|
|
for (j = 0; j < ins->src_count; ++j)
|
|
shader_register_normalise_flat_constants(&ins->src[j], &normaliser);
|
|
}
|
|
}
|
|
|
|
vkd3d_free(normaliser.defs);
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static void remove_dead_code(struct vsir_program *program)
|
|
{
|
|
size_t i, depth = 0;
|
|
bool dead = false;
|
|
|
|
for (i = 0; i < program->instructions.count; ++i)
|
|
{
|
|
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
|
|
|
|
switch (ins->handler_idx)
|
|
{
|
|
case VKD3DSIH_IF:
|
|
case VKD3DSIH_LOOP:
|
|
case VKD3DSIH_SWITCH:
|
|
if (dead)
|
|
{
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
++depth;
|
|
}
|
|
break;
|
|
|
|
case VKD3DSIH_ENDIF:
|
|
case VKD3DSIH_ENDLOOP:
|
|
case VKD3DSIH_ENDSWITCH:
|
|
case VKD3DSIH_ELSE:
|
|
if (dead)
|
|
{
|
|
if (depth > 0)
|
|
{
|
|
if (ins->handler_idx != VKD3DSIH_ELSE)
|
|
--depth;
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
}
|
|
else
|
|
{
|
|
dead = false;
|
|
}
|
|
}
|
|
break;
|
|
|
|
/* `depth' is counted with respect to where the dead code
|
|
* segment began. So it starts at zero and it signals the
|
|
* termination of the dead code segment when it would
|
|
* become negative. */
|
|
case VKD3DSIH_BREAK:
|
|
case VKD3DSIH_RET:
|
|
case VKD3DSIH_CONTINUE:
|
|
if (dead)
|
|
{
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
}
|
|
else
|
|
{
|
|
dead = true;
|
|
depth = 0;
|
|
}
|
|
break;
|
|
|
|
/* If `case' or `default' appears at zero depth, it means
|
|
* that they are a possible target for the corresponding
|
|
* switch, so the code is live again. */
|
|
case VKD3DSIH_CASE:
|
|
case VKD3DSIH_DEFAULT:
|
|
if (dead)
|
|
{
|
|
if (depth == 0)
|
|
dead = false;
|
|
else
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
}
|
|
break;
|
|
|
|
/* Phase instructions can only appear in hull shaders and
|
|
* outside of any block. When a phase returns, control is
|
|
* moved to the following phase, so they make code live
|
|
* again. */
|
|
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
|
|
case VKD3DSIH_HS_FORK_PHASE:
|
|
case VKD3DSIH_HS_JOIN_PHASE:
|
|
dead = false;
|
|
break;
|
|
|
|
default:
|
|
if (dead)
|
|
vkd3d_shader_instruction_make_nop(ins);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_program *program,
|
|
struct vkd3d_shader_message_context *message_context)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < program->instructions.count; ++i)
|
|
{
|
|
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
|
|
struct vkd3d_shader_src_param *srcs;
|
|
|
|
switch (ins->handler_idx)
|
|
{
|
|
case VKD3DSIH_TEX:
|
|
if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
memset(srcs, 0, sizeof(*srcs) * 3);
|
|
|
|
ins->handler_idx = VKD3DSIH_SAMPLE;
|
|
|
|
srcs[0] = ins->src[0];
|
|
|
|
srcs[1].reg.type = VKD3DSPR_RESOURCE;
|
|
srcs[1].reg.idx[0] = ins->src[1].reg.idx[0];
|
|
srcs[1].reg.idx[1] = ins->src[1].reg.idx[0];
|
|
srcs[1].reg.idx_count = 2;
|
|
srcs[1].reg.data_type = VKD3D_DATA_RESOURCE;
|
|
srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE;
|
|
|
|
srcs[2].reg.type = VKD3DSPR_SAMPLER;
|
|
srcs[2].reg.idx[0] = ins->src[1].reg.idx[0];
|
|
srcs[2].reg.idx[1] = ins->src[1].reg.idx[0];
|
|
srcs[2].reg.idx_count = 2;
|
|
srcs[2].reg.data_type = VKD3D_DATA_SAMPLER;
|
|
|
|
ins->src = srcs;
|
|
ins->src_count = 3;
|
|
break;
|
|
|
|
case VKD3DSIH_TEXBEM:
|
|
case VKD3DSIH_TEXBEML:
|
|
case VKD3DSIH_TEXCOORD:
|
|
case VKD3DSIH_TEXDEPTH:
|
|
case VKD3DSIH_TEXDP3:
|
|
case VKD3DSIH_TEXDP3TEX:
|
|
case VKD3DSIH_TEXLDD:
|
|
case VKD3DSIH_TEXLDL:
|
|
case VKD3DSIH_TEXM3x2PAD:
|
|
case VKD3DSIH_TEXM3x2TEX:
|
|
case VKD3DSIH_TEXM3x3DIFF:
|
|
case VKD3DSIH_TEXM3x3PAD:
|
|
case VKD3DSIH_TEXM3x3SPEC:
|
|
case VKD3DSIH_TEXM3x3TEX:
|
|
case VKD3DSIH_TEXM3x3VSPEC:
|
|
case VKD3DSIH_TEXREG2AR:
|
|
case VKD3DSIH_TEXREG2GB:
|
|
case VKD3DSIH_TEXREG2RGB:
|
|
vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
|
|
"Aborting due to not yet implemented feature: "
|
|
"Combined sampler instruction %#x.", ins->handler_idx);
|
|
return VKD3D_ERROR_NOT_IMPLEMENTED;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
struct cf_flattener_if_info
|
|
{
|
|
struct vkd3d_shader_src_param *false_param;
|
|
unsigned int id;
|
|
uint32_t merge_block_id;
|
|
unsigned int else_block_id;
|
|
};
|
|
|
|
struct cf_flattener_loop_info
|
|
{
|
|
unsigned int header_block_id;
|
|
unsigned int continue_block_id;
|
|
uint32_t merge_block_id;
|
|
};
|
|
|
|
struct cf_flattener_switch_case
|
|
{
|
|
unsigned int value;
|
|
unsigned int block_id;
|
|
};
|
|
|
|
struct cf_flattener_switch_info
|
|
{
|
|
size_t ins_location;
|
|
const struct vkd3d_shader_src_param *condition;
|
|
unsigned int id;
|
|
unsigned int merge_block_id;
|
|
unsigned int default_block_id;
|
|
struct cf_flattener_switch_case *cases;
|
|
size_t cases_size;
|
|
unsigned int cases_count;
|
|
};
|
|
|
|
struct cf_flattener_info
|
|
{
|
|
union
|
|
{
|
|
struct cf_flattener_if_info if_;
|
|
struct cf_flattener_loop_info loop;
|
|
struct cf_flattener_switch_info switch_;
|
|
} u;
|
|
|
|
enum
|
|
{
|
|
VKD3D_BLOCK_IF,
|
|
VKD3D_BLOCK_LOOP,
|
|
VKD3D_BLOCK_SWITCH,
|
|
} current_block;
|
|
bool inside_block;
|
|
};
|
|
|
|
struct cf_flattener
|
|
{
|
|
struct vsir_program *program;
|
|
|
|
struct vkd3d_shader_location location;
|
|
enum vkd3d_result status;
|
|
|
|
struct vkd3d_shader_instruction *instructions;
|
|
size_t instruction_capacity;
|
|
size_t instruction_count;
|
|
|
|
unsigned int block_id;
|
|
const char **block_names;
|
|
size_t block_name_capacity;
|
|
size_t block_name_count;
|
|
|
|
unsigned int branch_id;
|
|
unsigned int loop_id;
|
|
unsigned int switch_id;
|
|
|
|
unsigned int control_flow_depth;
|
|
struct cf_flattener_info *control_flow_info;
|
|
size_t control_flow_info_size;
|
|
};
|
|
|
|
static void cf_flattener_set_error(struct cf_flattener *flattener, enum vkd3d_result error)
|
|
{
|
|
if (flattener->status != VKD3D_OK)
|
|
return;
|
|
flattener->status = error;
|
|
}
|
|
|
|
static struct vkd3d_shader_instruction *cf_flattener_require_space(struct cf_flattener *flattener, size_t count)
|
|
{
|
|
if (!vkd3d_array_reserve((void **)&flattener->instructions, &flattener->instruction_capacity,
|
|
flattener->instruction_count + count, sizeof(*flattener->instructions)))
|
|
{
|
|
ERR("Failed to allocate instructions.\n");
|
|
cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY);
|
|
return NULL;
|
|
}
|
|
return &flattener->instructions[flattener->instruction_count];
|
|
}
|
|
|
|
static bool cf_flattener_copy_instruction(struct cf_flattener *flattener,
|
|
const struct vkd3d_shader_instruction *instruction)
|
|
{
|
|
struct vkd3d_shader_instruction *dst_ins;
|
|
|
|
if (instruction->handler_idx == VKD3DSIH_NOP)
|
|
return true;
|
|
|
|
if (!(dst_ins = cf_flattener_require_space(flattener, 1)))
|
|
return false;
|
|
|
|
*dst_ins = *instruction;
|
|
++flattener->instruction_count;
|
|
return true;
|
|
}
|
|
|
|
static unsigned int cf_flattener_alloc_block_id(struct cf_flattener *flattener)
|
|
{
|
|
return ++flattener->block_id;
|
|
}
|
|
|
|
static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_shader_instruction *ins,
|
|
unsigned int count, struct cf_flattener *flattener)
|
|
{
|
|
struct vkd3d_shader_src_param *params;
|
|
|
|
if (!(params = vsir_program_get_src_params(flattener->program, count)))
|
|
{
|
|
cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY);
|
|
return NULL;
|
|
}
|
|
ins->src = params;
|
|
ins->src_count = count;
|
|
return params;
|
|
}
|
|
|
|
static void cf_flattener_emit_label(struct cf_flattener *flattener, unsigned int label_id)
|
|
{
|
|
struct vkd3d_shader_instruction *ins;
|
|
|
|
if (!(ins = cf_flattener_require_space(flattener, 1)))
|
|
return;
|
|
if (vsir_instruction_init_label(ins, &flattener->location, label_id, flattener->program))
|
|
++flattener->instruction_count;
|
|
else
|
|
cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY);
|
|
}
|
|
|
|
/* For conditional branches, this returns the false target branch parameter. */
|
|
static struct vkd3d_shader_src_param *cf_flattener_emit_branch(struct cf_flattener *flattener,
|
|
unsigned int merge_block_id, unsigned int continue_block_id,
|
|
const struct vkd3d_shader_src_param *condition, unsigned int true_id, unsigned int false_id,
|
|
unsigned int flags)
|
|
{
|
|
struct vkd3d_shader_src_param *src_params, *false_branch_param;
|
|
struct vkd3d_shader_instruction *ins;
|
|
|
|
if (!(ins = cf_flattener_require_space(flattener, 1)))
|
|
return NULL;
|
|
vsir_instruction_init(ins, &flattener->location, VKD3DSIH_BRANCH);
|
|
|
|
if (condition)
|
|
{
|
|
if (!(src_params = instruction_src_params_alloc(ins, 4 + !!continue_block_id, flattener)))
|
|
return NULL;
|
|
src_params[0] = *condition;
|
|
if (flags == VKD3D_SHADER_CONDITIONAL_OP_Z)
|
|
{
|
|
vsir_src_param_init_label(&src_params[1], false_id);
|
|
vsir_src_param_init_label(&src_params[2], true_id);
|
|
false_branch_param = &src_params[1];
|
|
}
|
|
else
|
|
{
|
|
vsir_src_param_init_label(&src_params[1], true_id);
|
|
vsir_src_param_init_label(&src_params[2], false_id);
|
|
false_branch_param = &src_params[2];
|
|
}
|
|
vsir_src_param_init_label(&src_params[3], merge_block_id);
|
|
if (continue_block_id)
|
|
vsir_src_param_init_label(&src_params[4], continue_block_id);
|
|
}
|
|
else
|
|
{
|
|
if (!(src_params = instruction_src_params_alloc(ins, merge_block_id ? 3 : 1, flattener)))
|
|
return NULL;
|
|
vsir_src_param_init_label(&src_params[0], true_id);
|
|
if (merge_block_id)
|
|
{
|
|
/* An unconditional branch may only have merge information for a loop, which
|
|
* must have both a merge block and continue block. */
|
|
vsir_src_param_init_label(&src_params[1], merge_block_id);
|
|
vsir_src_param_init_label(&src_params[2], continue_block_id);
|
|
}
|
|
false_branch_param = NULL;
|
|
}
|
|
|
|
++flattener->instruction_count;
|
|
|
|
return false_branch_param;
|
|
}
|
|
|
|
static void cf_flattener_emit_conditional_branch_and_merge(struct cf_flattener *flattener,
|
|
const struct vkd3d_shader_src_param *condition, unsigned int true_id, unsigned int flags)
|
|
{
|
|
unsigned int merge_block_id;
|
|
|
|
merge_block_id = cf_flattener_alloc_block_id(flattener);
|
|
cf_flattener_emit_branch(flattener, merge_block_id, 0, condition, true_id, merge_block_id, flags);
|
|
cf_flattener_emit_label(flattener, merge_block_id);
|
|
}
|
|
|
|
static void cf_flattener_emit_unconditional_branch(struct cf_flattener *flattener, unsigned int target_block_id)
|
|
{
|
|
cf_flattener_emit_branch(flattener, 0, 0, NULL, target_block_id, 0, 0);
|
|
}
|
|
|
|
static struct cf_flattener_info *cf_flattener_push_control_flow_level(struct cf_flattener *flattener)
|
|
{
|
|
if (!vkd3d_array_reserve((void **)&flattener->control_flow_info, &flattener->control_flow_info_size,
|
|
flattener->control_flow_depth + 1, sizeof(*flattener->control_flow_info)))
|
|
{
|
|
ERR("Failed to allocate control flow info structure.\n");
|
|
cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY);
|
|
return NULL;
|
|
}
|
|
|
|
return &flattener->control_flow_info[flattener->control_flow_depth++];
|
|
}
|
|
|
|
static void cf_flattener_pop_control_flow_level(struct cf_flattener *flattener)
|
|
{
|
|
struct cf_flattener_info *cf_info;
|
|
|
|
cf_info = &flattener->control_flow_info[--flattener->control_flow_depth];
|
|
memset(cf_info, 0, sizeof(*cf_info));
|
|
}
|
|
|
|
static struct cf_flattener_info *cf_flattener_find_innermost_loop(struct cf_flattener *flattener)
|
|
{
|
|
int depth;
|
|
|
|
for (depth = flattener->control_flow_depth - 1; depth >= 0; --depth)
|
|
{
|
|
if (flattener->control_flow_info[depth].current_block == VKD3D_BLOCK_LOOP)
|
|
return &flattener->control_flow_info[depth];
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static struct cf_flattener_info *cf_flattener_find_innermost_breakable_cf_construct(struct cf_flattener *flattener)
|
|
{
|
|
int depth;
|
|
|
|
for (depth = flattener->control_flow_depth - 1; depth >= 0; --depth)
|
|
{
|
|
if (flattener->control_flow_info[depth].current_block == VKD3D_BLOCK_LOOP
|
|
|| flattener->control_flow_info[depth].current_block == VKD3D_BLOCK_SWITCH)
|
|
return &flattener->control_flow_info[depth];
|
|
}
|
|
|
|
return NULL;
|
|
}
|
|
|
|
static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_flattener *flattener,
|
|
unsigned int block_id, const char *fmt, ...)
|
|
{
|
|
struct vkd3d_string_buffer buffer;
|
|
size_t block_name_count;
|
|
va_list args;
|
|
|
|
--block_id;
|
|
|
|
block_name_count = max(flattener->block_name_count, block_id + 1);
|
|
if (!vkd3d_array_reserve((void **)&flattener->block_names, &flattener->block_name_capacity,
|
|
block_name_count, sizeof(*flattener->block_names)))
|
|
return;
|
|
memset(&flattener->block_names[flattener->block_name_count], 0,
|
|
(block_name_count - flattener->block_name_count) * sizeof(*flattener->block_names));
|
|
flattener->block_name_count = block_name_count;
|
|
|
|
vkd3d_string_buffer_init(&buffer);
|
|
va_start(args, fmt);
|
|
vkd3d_string_buffer_vprintf(&buffer, fmt, args);
|
|
va_end(args);
|
|
|
|
flattener->block_names[block_id] = buffer.buffer;
|
|
}
|
|
|
|
static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener,
|
|
struct vkd3d_shader_message_context *message_context)
|
|
{
|
|
bool main_block_open, is_hull_shader, after_declarations_section;
|
|
struct vkd3d_shader_instruction_array *instructions;
|
|
struct vsir_program *program = flattener->program;
|
|
struct vkd3d_shader_instruction *dst_ins;
|
|
size_t i;
|
|
|
|
instructions = &program->instructions;
|
|
is_hull_shader = program->shader_version.type == VKD3D_SHADER_TYPE_HULL;
|
|
main_block_open = !is_hull_shader;
|
|
after_declarations_section = is_hull_shader;
|
|
|
|
if (!cf_flattener_require_space(flattener, instructions->count + 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
for (i = 0; i < instructions->count; ++i)
|
|
{
|
|
unsigned int loop_header_block_id, loop_body_block_id, continue_block_id, merge_block_id, true_block_id;
|
|
const struct vkd3d_shader_instruction *instruction = &instructions->elements[i];
|
|
const struct vkd3d_shader_src_param *src = instruction->src;
|
|
struct cf_flattener_info *cf_info;
|
|
|
|
flattener->location = instruction->location;
|
|
|
|
/* Declarations should occur before the first code block, which in hull shaders is marked by the first
|
|
* phase instruction, and in all other shader types begins with the first label instruction.
|
|
* Declaring an indexable temp with function scope is not considered a declaration,
|
|
* because it needs to live inside a function. */
|
|
if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP)
|
|
{
|
|
bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP
|
|
&& instruction->declaration.indexable_temp.has_function_scope;
|
|
|
|
if (!vsir_instruction_is_dcl(instruction) || is_function_indexable)
|
|
{
|
|
after_declarations_section = true;
|
|
cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener));
|
|
}
|
|
}
|
|
|
|
cf_info = flattener->control_flow_depth
|
|
? &flattener->control_flow_info[flattener->control_flow_depth - 1] : NULL;
|
|
|
|
switch (instruction->handler_idx)
|
|
{
|
|
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
|
|
case VKD3DSIH_HS_FORK_PHASE:
|
|
case VKD3DSIH_HS_JOIN_PHASE:
|
|
if (!cf_flattener_copy_instruction(flattener, instruction))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
if (instruction->handler_idx != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags)
|
|
after_declarations_section = false;
|
|
break;
|
|
|
|
case VKD3DSIH_LABEL:
|
|
vkd3d_shader_error(message_context, &instruction->location,
|
|
VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
|
|
"Aborting due to not yet implemented feature: Label instruction.");
|
|
return VKD3D_ERROR_NOT_IMPLEMENTED;
|
|
|
|
case VKD3DSIH_IF:
|
|
if (!(cf_info = cf_flattener_push_control_flow_level(flattener)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
true_block_id = cf_flattener_alloc_block_id(flattener);
|
|
merge_block_id = cf_flattener_alloc_block_id(flattener);
|
|
cf_info->u.if_.false_param = cf_flattener_emit_branch(flattener, merge_block_id, 0,
|
|
src, true_block_id, merge_block_id, instruction->flags);
|
|
if (!cf_info->u.if_.false_param)
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
cf_flattener_emit_label(flattener, true_block_id);
|
|
|
|
cf_info->u.if_.id = flattener->branch_id;
|
|
cf_info->u.if_.merge_block_id = merge_block_id;
|
|
cf_info->u.if_.else_block_id = 0;
|
|
cf_info->inside_block = true;
|
|
cf_info->current_block = VKD3D_BLOCK_IF;
|
|
|
|
cf_flattener_create_block_name(flattener, merge_block_id, "branch%u_merge", flattener->branch_id);
|
|
cf_flattener_create_block_name(flattener, true_block_id, "branch%u_true", flattener->branch_id);
|
|
++flattener->branch_id;
|
|
break;
|
|
|
|
case VKD3DSIH_ELSE:
|
|
if (cf_info->inside_block)
|
|
cf_flattener_emit_unconditional_branch(flattener, cf_info->u.if_.merge_block_id);
|
|
|
|
cf_info->u.if_.else_block_id = cf_flattener_alloc_block_id(flattener);
|
|
cf_info->u.if_.false_param->reg.idx[0].offset = cf_info->u.if_.else_block_id;
|
|
|
|
cf_flattener_create_block_name(flattener,
|
|
cf_info->u.if_.else_block_id, "branch%u_false", cf_info->u.if_.id);
|
|
cf_flattener_emit_label(flattener, cf_info->u.if_.else_block_id);
|
|
|
|
cf_info->inside_block = true;
|
|
break;
|
|
|
|
case VKD3DSIH_ENDIF:
|
|
if (cf_info->inside_block)
|
|
cf_flattener_emit_unconditional_branch(flattener, cf_info->u.if_.merge_block_id);
|
|
|
|
cf_flattener_emit_label(flattener, cf_info->u.if_.merge_block_id);
|
|
|
|
cf_flattener_pop_control_flow_level(flattener);
|
|
break;
|
|
|
|
case VKD3DSIH_LOOP:
|
|
if (!(cf_info = cf_flattener_push_control_flow_level(flattener)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
loop_header_block_id = cf_flattener_alloc_block_id(flattener);
|
|
loop_body_block_id = cf_flattener_alloc_block_id(flattener);
|
|
continue_block_id = cf_flattener_alloc_block_id(flattener);
|
|
merge_block_id = cf_flattener_alloc_block_id(flattener);
|
|
|
|
cf_flattener_emit_unconditional_branch(flattener, loop_header_block_id);
|
|
cf_flattener_emit_label(flattener, loop_header_block_id);
|
|
cf_flattener_emit_branch(flattener, merge_block_id, continue_block_id,
|
|
NULL, loop_body_block_id, 0, 0);
|
|
|
|
cf_flattener_emit_label(flattener, loop_body_block_id);
|
|
|
|
cf_info->u.loop.header_block_id = loop_header_block_id;
|
|
cf_info->u.loop.continue_block_id = continue_block_id;
|
|
cf_info->u.loop.merge_block_id = merge_block_id;
|
|
cf_info->current_block = VKD3D_BLOCK_LOOP;
|
|
cf_info->inside_block = true;
|
|
|
|
cf_flattener_create_block_name(flattener, loop_header_block_id, "loop%u_header", flattener->loop_id);
|
|
cf_flattener_create_block_name(flattener, loop_body_block_id, "loop%u_body", flattener->loop_id);
|
|
cf_flattener_create_block_name(flattener, continue_block_id, "loop%u_continue", flattener->loop_id);
|
|
cf_flattener_create_block_name(flattener, merge_block_id, "loop%u_merge", flattener->loop_id);
|
|
++flattener->loop_id;
|
|
break;
|
|
|
|
case VKD3DSIH_ENDLOOP:
|
|
if (cf_info->inside_block)
|
|
cf_flattener_emit_unconditional_branch(flattener, cf_info->u.loop.continue_block_id);
|
|
|
|
cf_flattener_emit_label(flattener, cf_info->u.loop.continue_block_id);
|
|
cf_flattener_emit_unconditional_branch(flattener, cf_info->u.loop.header_block_id);
|
|
cf_flattener_emit_label(flattener, cf_info->u.loop.merge_block_id);
|
|
|
|
cf_flattener_pop_control_flow_level(flattener);
|
|
break;
|
|
|
|
case VKD3DSIH_SWITCH:
|
|
if (!(cf_info = cf_flattener_push_control_flow_level(flattener)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
merge_block_id = cf_flattener_alloc_block_id(flattener);
|
|
|
|
cf_info->u.switch_.ins_location = flattener->instruction_count;
|
|
cf_info->u.switch_.condition = src;
|
|
|
|
if (!(dst_ins = cf_flattener_require_space(flattener, 1)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
vsir_instruction_init(dst_ins, &instruction->location, VKD3DSIH_SWITCH_MONOLITHIC);
|
|
++flattener->instruction_count;
|
|
|
|
cf_info->u.switch_.id = flattener->switch_id;
|
|
cf_info->u.switch_.merge_block_id = merge_block_id;
|
|
cf_info->u.switch_.cases = NULL;
|
|
cf_info->u.switch_.cases_size = 0;
|
|
cf_info->u.switch_.cases_count = 0;
|
|
cf_info->u.switch_.default_block_id = 0;
|
|
cf_info->inside_block = false;
|
|
cf_info->current_block = VKD3D_BLOCK_SWITCH;
|
|
|
|
cf_flattener_create_block_name(flattener, merge_block_id, "switch%u_merge", flattener->switch_id);
|
|
++flattener->switch_id;
|
|
|
|
if (!vkd3d_array_reserve((void **)&cf_info->u.switch_.cases, &cf_info->u.switch_.cases_size,
|
|
10, sizeof(*cf_info->u.switch_.cases)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
break;
|
|
|
|
case VKD3DSIH_ENDSWITCH:
|
|
{
|
|
struct vkd3d_shader_src_param *src_params;
|
|
unsigned int j;
|
|
|
|
if (!cf_info->u.switch_.default_block_id)
|
|
cf_info->u.switch_.default_block_id = cf_info->u.switch_.merge_block_id;
|
|
|
|
cf_flattener_emit_label(flattener, cf_info->u.switch_.merge_block_id);
|
|
|
|
/* The SWITCH instruction is completed when the endswitch
|
|
* instruction is processed because we do not know the number
|
|
* of case statements or the default block id in advance.*/
|
|
dst_ins = &flattener->instructions[cf_info->u.switch_.ins_location];
|
|
if (!(src_params = instruction_src_params_alloc(dst_ins, cf_info->u.switch_.cases_count * 2 + 3, flattener)))
|
|
{
|
|
vkd3d_free(cf_info->u.switch_.cases);
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
src_params[0] = *cf_info->u.switch_.condition;
|
|
vsir_src_param_init_label(&src_params[1], cf_info->u.switch_.default_block_id);
|
|
vsir_src_param_init_label(&src_params[2], cf_info->u.switch_.merge_block_id);
|
|
for (j = 0; j < cf_info->u.switch_.cases_count; ++j)
|
|
{
|
|
unsigned int index = j * 2 + 3;
|
|
vsir_src_param_init(&src_params[index], VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0);
|
|
src_params[index].reg.u.immconst_u32[0] = cf_info->u.switch_.cases[j].value;
|
|
vsir_src_param_init_label(&src_params[index + 1], cf_info->u.switch_.cases[j].block_id);
|
|
}
|
|
vkd3d_free(cf_info->u.switch_.cases);
|
|
|
|
cf_flattener_pop_control_flow_level(flattener);
|
|
break;
|
|
}
|
|
|
|
case VKD3DSIH_CASE:
|
|
{
|
|
unsigned int label_id, value;
|
|
|
|
if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X))
|
|
{
|
|
WARN("Unexpected src swizzle %#x.\n", src->swizzle);
|
|
vkd3d_shader_error(message_context, &instruction->location,
|
|
VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE,
|
|
"The swizzle for a switch case value is not scalar X.");
|
|
cf_flattener_set_error(flattener, VKD3D_ERROR_INVALID_SHADER);
|
|
}
|
|
value = *src->reg.u.immconst_u32;
|
|
|
|
if (!vkd3d_array_reserve((void **)&cf_info->u.switch_.cases, &cf_info->u.switch_.cases_size,
|
|
cf_info->u.switch_.cases_count + 1, sizeof(*cf_info->u.switch_.cases)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
label_id = cf_flattener_alloc_block_id(flattener);
|
|
if (cf_info->inside_block) /* fall-through */
|
|
cf_flattener_emit_unconditional_branch(flattener, label_id);
|
|
|
|
cf_info->u.switch_.cases[cf_info->u.switch_.cases_count].value = value;
|
|
cf_info->u.switch_.cases[cf_info->u.switch_.cases_count].block_id = label_id;
|
|
++cf_info->u.switch_.cases_count;
|
|
|
|
cf_flattener_emit_label(flattener, label_id);
|
|
cf_flattener_create_block_name(flattener, label_id, "switch%u_case%u", cf_info->u.switch_.id, value);
|
|
cf_info->inside_block = true;
|
|
break;
|
|
}
|
|
|
|
case VKD3DSIH_DEFAULT:
|
|
cf_info->u.switch_.default_block_id = cf_flattener_alloc_block_id(flattener);
|
|
if (cf_info->inside_block) /* fall-through */
|
|
cf_flattener_emit_unconditional_branch(flattener, cf_info->u.switch_.default_block_id);
|
|
|
|
cf_flattener_emit_label(flattener, cf_info->u.switch_.default_block_id);
|
|
|
|
cf_flattener_create_block_name(flattener, cf_info->u.switch_.default_block_id,
|
|
"switch%u_default", cf_info->u.switch_.id);
|
|
cf_info->inside_block = true;
|
|
break;
|
|
|
|
case VKD3DSIH_BREAK:
|
|
{
|
|
struct cf_flattener_info *breakable_cf_info;
|
|
|
|
if (!(breakable_cf_info = cf_flattener_find_innermost_breakable_cf_construct(flattener)))
|
|
{
|
|
FIXME("Unhandled break instruction.\n");
|
|
return VKD3D_ERROR_INVALID_SHADER;
|
|
}
|
|
|
|
if (breakable_cf_info->current_block == VKD3D_BLOCK_LOOP)
|
|
{
|
|
cf_flattener_emit_unconditional_branch(flattener, breakable_cf_info->u.loop.merge_block_id);
|
|
}
|
|
else if (breakable_cf_info->current_block == VKD3D_BLOCK_SWITCH)
|
|
{
|
|
cf_flattener_emit_unconditional_branch(flattener, breakable_cf_info->u.switch_.merge_block_id);
|
|
}
|
|
|
|
cf_info->inside_block = false;
|
|
break;
|
|
}
|
|
|
|
case VKD3DSIH_BREAKP:
|
|
{
|
|
struct cf_flattener_info *loop_cf_info;
|
|
|
|
if (!(loop_cf_info = cf_flattener_find_innermost_loop(flattener)))
|
|
{
|
|
ERR("Invalid 'breakc' instruction outside loop.\n");
|
|
return VKD3D_ERROR_INVALID_SHADER;
|
|
}
|
|
|
|
cf_flattener_emit_conditional_branch_and_merge(flattener,
|
|
src, loop_cf_info->u.loop.merge_block_id, instruction->flags);
|
|
break;
|
|
}
|
|
|
|
case VKD3DSIH_CONTINUE:
|
|
{
|
|
struct cf_flattener_info *loop_cf_info;
|
|
|
|
if (!(loop_cf_info = cf_flattener_find_innermost_loop(flattener)))
|
|
{
|
|
ERR("Invalid 'continue' instruction outside loop.\n");
|
|
return VKD3D_ERROR_INVALID_SHADER;
|
|
}
|
|
|
|
cf_flattener_emit_unconditional_branch(flattener, loop_cf_info->u.loop.continue_block_id);
|
|
|
|
cf_info->inside_block = false;
|
|
break;
|
|
}
|
|
|
|
case VKD3DSIH_CONTINUEP:
|
|
{
|
|
struct cf_flattener_info *loop_cf_info;
|
|
|
|
if (!(loop_cf_info = cf_flattener_find_innermost_loop(flattener)))
|
|
{
|
|
ERR("Invalid 'continuec' instruction outside loop.\n");
|
|
return VKD3D_ERROR_INVALID_SHADER;
|
|
}
|
|
|
|
cf_flattener_emit_conditional_branch_and_merge(flattener,
|
|
src, loop_cf_info->u.loop.continue_block_id, instruction->flags);
|
|
break;
|
|
}
|
|
|
|
case VKD3DSIH_RET:
|
|
if (!cf_flattener_copy_instruction(flattener, instruction))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
if (cf_info)
|
|
cf_info->inside_block = false;
|
|
else
|
|
main_block_open = false;
|
|
break;
|
|
|
|
default:
|
|
if (!cf_flattener_copy_instruction(flattener, instruction))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (main_block_open)
|
|
{
|
|
if (!(dst_ins = cf_flattener_require_space(flattener, 1)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
vsir_instruction_init(dst_ins, &flattener->location, VKD3DSIH_RET);
|
|
++flattener->instruction_count;
|
|
}
|
|
|
|
return flattener->status;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program,
|
|
struct vkd3d_shader_message_context *message_context)
|
|
{
|
|
struct cf_flattener flattener = {.program = program};
|
|
enum vkd3d_result result;
|
|
|
|
if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0)
|
|
{
|
|
vkd3d_free(program->instructions.elements);
|
|
program->instructions.elements = flattener.instructions;
|
|
program->instructions.capacity = flattener.instruction_capacity;
|
|
program->instructions.count = flattener.instruction_count;
|
|
program->block_count = flattener.block_id;
|
|
}
|
|
else
|
|
{
|
|
vkd3d_free(flattener.instructions);
|
|
}
|
|
|
|
vkd3d_free(flattener.control_flow_info);
|
|
/* Simpler to always free these in vsir_program_cleanup(). */
|
|
program->block_names = flattener.block_names;
|
|
program->block_name_count = flattener.block_name_count;
|
|
|
|
return result;
|
|
}
|
|
|
|
static unsigned int label_from_src_param(const struct vkd3d_shader_src_param *param)
|
|
{
|
|
assert(param->reg.type == VKD3DSPR_LABEL);
|
|
return param->reg.idx[0].offset;
|
|
}
|
|
|
|
static bool reserve_instructions(struct vkd3d_shader_instruction **instructions, size_t *capacity, size_t count)
|
|
{
|
|
if (!vkd3d_array_reserve((void **)instructions, capacity, count, sizeof(**instructions)))
|
|
{
|
|
ERR("Failed to allocate instructions.\n");
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/* A record represents replacing a jump from block `switch_label' to
|
|
* block `target_label' with a jump from block `if_label' to block
|
|
* `target_label'. */
|
|
struct lower_switch_to_if_ladder_block_mapping
|
|
{
|
|
unsigned int switch_label;
|
|
unsigned int if_label;
|
|
unsigned int target_label;
|
|
};
|
|
|
|
static bool lower_switch_to_if_ladder_add_block_mapping(struct lower_switch_to_if_ladder_block_mapping **block_map,
|
|
size_t *map_capacity, size_t *map_count, unsigned int switch_label, unsigned int if_label, unsigned int target_label)
|
|
{
|
|
if (!vkd3d_array_reserve((void **)block_map, map_capacity, *map_count + 1, sizeof(**block_map)))
|
|
{
|
|
ERR("Failed to allocate block mapping.\n");
|
|
return false;
|
|
}
|
|
|
|
(*block_map)[*map_count].switch_label = switch_label;
|
|
(*block_map)[*map_count].if_label = if_label;
|
|
(*block_map)[*map_count].target_label = target_label;
|
|
|
|
*map_count += 1;
|
|
|
|
return true;
|
|
}
|
|
|
|
static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program)
|
|
{
|
|
unsigned int block_count = program->block_count, ssa_count = program->ssa_count, current_label = 0, if_label;
|
|
size_t ins_capacity = 0, ins_count = 0, i, map_capacity = 0, map_count = 0;
|
|
struct vkd3d_shader_instruction *instructions = NULL;
|
|
struct lower_switch_to_if_ladder_block_mapping *block_map = NULL;
|
|
|
|
if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count))
|
|
goto fail;
|
|
|
|
/* First subpass: convert SWITCH_MONOLITHIC instructions to
|
|
* selection ladders, keeping a map between blocks before and
|
|
* after the subpass. */
|
|
for (i = 0; i < program->instructions.count; ++i)
|
|
{
|
|
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
|
|
unsigned int case_count, j, default_label;
|
|
|
|
switch (ins->handler_idx)
|
|
{
|
|
case VKD3DSIH_LABEL:
|
|
current_label = label_from_src_param(&ins->src[0]);
|
|
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1))
|
|
goto fail;
|
|
instructions[ins_count++] = *ins;
|
|
continue;
|
|
|
|
case VKD3DSIH_SWITCH_MONOLITHIC:
|
|
break;
|
|
|
|
default:
|
|
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1))
|
|
goto fail;
|
|
instructions[ins_count++] = *ins;
|
|
continue;
|
|
}
|
|
|
|
case_count = (ins->src_count - 3) / 2;
|
|
default_label = label_from_src_param(&ins->src[1]);
|
|
|
|
/* In principle we can have a switch with no cases, and we
|
|
* just have to jump to the default label. */
|
|
if (case_count == 0)
|
|
{
|
|
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1))
|
|
goto fail;
|
|
|
|
if (!vsir_instruction_init_with_params(program, &instructions[ins_count],
|
|
&ins->location, VKD3DSIH_BRANCH, 0, 1))
|
|
goto fail;
|
|
vsir_src_param_init_label(&instructions[ins_count].src[0], default_label);
|
|
++ins_count;
|
|
}
|
|
|
|
if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3 * case_count - 1))
|
|
goto fail;
|
|
|
|
if_label = current_label;
|
|
|
|
for (j = 0; j < case_count; ++j)
|
|
{
|
|
unsigned int fallthrough_label, case_label = label_from_src_param(&ins->src[3 + 2 * j + 1]);
|
|
|
|
if (!vsir_instruction_init_with_params(program,
|
|
&instructions[ins_count], &ins->location, VKD3DSIH_IEQ, 1, 2))
|
|
goto fail;
|
|
dst_param_init_ssa_bool(&instructions[ins_count].dst[0], ssa_count);
|
|
instructions[ins_count].src[0] = ins->src[0];
|
|
instructions[ins_count].src[1] = ins->src[3 + 2 * j];
|
|
++ins_count;
|
|
|
|
/* For all cases except the last one we fall through to
|
|
* the following case; the last one has to jump to the
|
|
* default label. */
|
|
if (j == case_count - 1)
|
|
fallthrough_label = default_label;
|
|
else
|
|
fallthrough_label = block_count + 1;
|
|
|
|
if (!vsir_instruction_init_with_params(program, &instructions[ins_count],
|
|
&ins->location, VKD3DSIH_BRANCH, 0, 3))
|
|
goto fail;
|
|
src_param_init_ssa_bool(&instructions[ins_count].src[0], ssa_count);
|
|
vsir_src_param_init_label(&instructions[ins_count].src[1], case_label);
|
|
vsir_src_param_init_label(&instructions[ins_count].src[2], fallthrough_label);
|
|
++ins_count;
|
|
|
|
++ssa_count;
|
|
|
|
if (!lower_switch_to_if_ladder_add_block_mapping(&block_map, &map_capacity, &map_count,
|
|
current_label, if_label, case_label))
|
|
goto fail;
|
|
|
|
if (j == case_count - 1)
|
|
{
|
|
if (!lower_switch_to_if_ladder_add_block_mapping(&block_map, &map_capacity, &map_count,
|
|
current_label, if_label, default_label))
|
|
goto fail;
|
|
}
|
|
else
|
|
{
|
|
if (!vsir_instruction_init_with_params(program,
|
|
&instructions[ins_count], &ins->location, VKD3DSIH_LABEL, 0, 1))
|
|
goto fail;
|
|
vsir_src_param_init_label(&instructions[ins_count].src[0], ++block_count);
|
|
++ins_count;
|
|
|
|
if_label = block_count;
|
|
}
|
|
}
|
|
}
|
|
|
|
vkd3d_free(program->instructions.elements);
|
|
vkd3d_free(block_map);
|
|
program->instructions.elements = instructions;
|
|
program->instructions.capacity = ins_capacity;
|
|
program->instructions.count = ins_count;
|
|
program->block_count = block_count;
|
|
program->ssa_count = ssa_count;
|
|
|
|
return VKD3D_OK;
|
|
|
|
fail:
|
|
vkd3d_free(instructions);
|
|
vkd3d_free(block_map);
|
|
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
struct ssas_to_temps_alloc
|
|
{
|
|
unsigned int *table;
|
|
unsigned int next_temp_idx;
|
|
};
|
|
|
|
static bool ssas_to_temps_alloc_init(struct ssas_to_temps_alloc *alloc, unsigned int ssa_count, unsigned int temp_count)
|
|
{
|
|
size_t i = ssa_count * sizeof(*alloc->table);
|
|
|
|
if (!(alloc->table = vkd3d_malloc(i)))
|
|
{
|
|
ERR("Failed to allocate SSA table.\n");
|
|
return false;
|
|
}
|
|
memset(alloc->table, 0xff, i);
|
|
|
|
alloc->next_temp_idx = temp_count;
|
|
return true;
|
|
}
|
|
|
|
/* This is idempotent: it can be safely applied more than once on the
|
|
* same register. */
|
|
static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct ssas_to_temps_alloc *alloc,
|
|
struct vkd3d_shader_register *reg)
|
|
{
|
|
unsigned int i;
|
|
|
|
if (reg->type == VKD3DSPR_SSA && alloc->table[reg->idx[0].offset] != UINT_MAX)
|
|
{
|
|
reg->type = VKD3DSPR_TEMP;
|
|
reg->idx[0].offset = alloc->table[reg->idx[0].offset];
|
|
}
|
|
|
|
for (i = 0; i < reg->idx_count; ++i)
|
|
if (reg->idx[i].rel_addr)
|
|
materialize_ssas_to_temps_process_reg(program, alloc, ®->idx[i].rel_addr->reg);
|
|
}
|
|
|
|
struct ssas_to_temps_block_info
|
|
{
|
|
struct phi_incoming_to_temp
|
|
{
|
|
struct vkd3d_shader_src_param *src;
|
|
struct vkd3d_shader_dst_param *dst;
|
|
} *incomings;
|
|
size_t incoming_capacity;
|
|
size_t incoming_count;
|
|
};
|
|
|
|
static void ssas_to_temps_block_info_cleanup(struct ssas_to_temps_block_info *block_info,
|
|
size_t count)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < count; ++i)
|
|
vkd3d_free(block_info[i].incomings);
|
|
|
|
vkd3d_free(block_info);
|
|
}
|
|
|
|
static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_program *program)
|
|
{
|
|
size_t ins_capacity = 0, ins_count = 0, phi_count, incoming_count, i;
|
|
struct ssas_to_temps_block_info *info, *block_info = NULL;
|
|
struct vkd3d_shader_instruction *instructions = NULL;
|
|
struct ssas_to_temps_alloc alloc = {0};
|
|
unsigned int current_label = 0;
|
|
|
|
if (!(block_info = vkd3d_calloc(program->block_count, sizeof(*block_info))))
|
|
{
|
|
ERR("Failed to allocate block info array.\n");
|
|
goto fail;
|
|
}
|
|
|
|
if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count))
|
|
goto fail;
|
|
|
|
for (i = 0, phi_count = 0, incoming_count = 0; i < program->instructions.count; ++i)
|
|
{
|
|
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
|
|
unsigned int j, temp_idx;
|
|
|
|
/* Only phi src/dst SSA values need be converted here. Structurisation may
|
|
* introduce new cases of undominated SSA use, which will be handled later. */
|
|
if (ins->handler_idx != VKD3DSIH_PHI)
|
|
continue;
|
|
++phi_count;
|
|
|
|
temp_idx = alloc.next_temp_idx++;
|
|
|
|
for (j = 0; j < ins->src_count; j += 2)
|
|
{
|
|
struct phi_incoming_to_temp *incoming;
|
|
unsigned int label;
|
|
|
|
label = label_from_src_param(&ins->src[j + 1]);
|
|
assert(label);
|
|
|
|
info = &block_info[label - 1];
|
|
|
|
if (!(vkd3d_array_reserve((void **)&info->incomings, &info->incoming_capacity, info->incoming_count + 1,
|
|
sizeof(*info->incomings))))
|
|
goto fail;
|
|
|
|
incoming = &info->incomings[info->incoming_count++];
|
|
incoming->src = &ins->src[j];
|
|
incoming->dst = ins->dst;
|
|
|
|
alloc.table[ins->dst->reg.idx[0].offset] = temp_idx;
|
|
|
|
++incoming_count;
|
|
}
|
|
|
|
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst->reg);
|
|
}
|
|
|
|
if (!phi_count)
|
|
goto done;
|
|
|
|
if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count + incoming_count - phi_count))
|
|
goto fail;
|
|
|
|
for (i = 0; i < program->instructions.count; ++i)
|
|
{
|
|
struct vkd3d_shader_instruction *mov_ins, *ins = &program->instructions.elements[i];
|
|
size_t j;
|
|
|
|
for (j = 0; j < ins->dst_count; ++j)
|
|
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg);
|
|
|
|
for (j = 0; j < ins->src_count; ++j)
|
|
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg);
|
|
|
|
switch (ins->handler_idx)
|
|
{
|
|
case VKD3DSIH_LABEL:
|
|
current_label = label_from_src_param(&ins->src[0]);
|
|
break;
|
|
|
|
case VKD3DSIH_BRANCH:
|
|
case VKD3DSIH_SWITCH_MONOLITHIC:
|
|
info = &block_info[current_label - 1];
|
|
|
|
for (j = 0; j < info->incoming_count; ++j)
|
|
{
|
|
struct phi_incoming_to_temp *incoming = &info->incomings[j];
|
|
|
|
mov_ins = &instructions[ins_count++];
|
|
if (!vsir_instruction_init_with_params(program, mov_ins, &ins->location, VKD3DSIH_MOV, 1, 0))
|
|
goto fail;
|
|
*mov_ins->dst = *incoming->dst;
|
|
mov_ins->src = incoming->src;
|
|
mov_ins->src_count = 1;
|
|
}
|
|
break;
|
|
|
|
case VKD3DSIH_PHI:
|
|
continue;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
instructions[ins_count++] = *ins;
|
|
}
|
|
|
|
vkd3d_free(program->instructions.elements);
|
|
program->instructions.elements = instructions;
|
|
program->instructions.capacity = ins_capacity;
|
|
program->instructions.count = ins_count;
|
|
program->temp_count = alloc.next_temp_idx;
|
|
done:
|
|
ssas_to_temps_block_info_cleanup(block_info, program->block_count);
|
|
vkd3d_free(alloc.table);
|
|
|
|
return VKD3D_OK;
|
|
|
|
fail:
|
|
vkd3d_free(instructions);
|
|
ssas_to_temps_block_info_cleanup(block_info, program->block_count);
|
|
vkd3d_free(alloc.table);
|
|
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
struct vsir_block_list
|
|
{
|
|
struct vsir_block **blocks;
|
|
size_t count, capacity;
|
|
};
|
|
|
|
static void vsir_block_list_init(struct vsir_block_list *list)
|
|
{
|
|
memset(list, 0, sizeof(*list));
|
|
}
|
|
|
|
static void vsir_block_list_cleanup(struct vsir_block_list *list)
|
|
{
|
|
vkd3d_free(list->blocks);
|
|
}
|
|
|
|
static enum vkd3d_result vsir_block_list_add_checked(struct vsir_block_list *list, struct vsir_block *block)
|
|
{
|
|
if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks)))
|
|
{
|
|
ERR("Cannot extend block list.\n");
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
list->blocks[list->count++] = block;
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < list->count; ++i)
|
|
if (block == list->blocks[i])
|
|
return VKD3D_FALSE;
|
|
|
|
return vsir_block_list_add_checked(list, block);
|
|
}
|
|
|
|
/* It is guaranteed that the relative order is kept. */
|
|
static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t idx)
|
|
{
|
|
--list->count;
|
|
memmove(&list->blocks[idx], &list->blocks[idx + 1], (list->count - idx) * sizeof(*list->blocks));
|
|
}
|
|
|
|
struct vsir_block
|
|
{
|
|
unsigned int label, order_pos;
|
|
/* `begin' points to the instruction immediately following the
|
|
* LABEL that introduces the block. `end' points to the terminator
|
|
* instruction (either BRANCH or RET). They can coincide, meaning
|
|
* that the block is empty. */
|
|
struct vkd3d_shader_instruction *begin, *end;
|
|
struct vsir_block_list predecessors, successors;
|
|
uint32_t *dominates;
|
|
};
|
|
|
|
static enum vkd3d_result vsir_block_init(struct vsir_block *block, unsigned int label, size_t block_count)
|
|
{
|
|
size_t byte_count;
|
|
|
|
if (block_count > SIZE_MAX - (sizeof(*block->dominates) * CHAR_BIT - 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
block_count = align(block_count, sizeof(*block->dominates) * CHAR_BIT);
|
|
byte_count = block_count / CHAR_BIT;
|
|
|
|
assert(label);
|
|
memset(block, 0, sizeof(*block));
|
|
block->label = label;
|
|
vsir_block_list_init(&block->predecessors);
|
|
vsir_block_list_init(&block->successors);
|
|
|
|
if (!(block->dominates = vkd3d_malloc(byte_count)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
memset(block->dominates, 0xff, byte_count);
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static void vsir_block_cleanup(struct vsir_block *block)
|
|
{
|
|
if (block->label == 0)
|
|
return;
|
|
vsir_block_list_cleanup(&block->predecessors);
|
|
vsir_block_list_cleanup(&block->successors);
|
|
vkd3d_free(block->dominates);
|
|
}
|
|
|
|
static int block_compare(const void *ptr1, const void *ptr2)
|
|
{
|
|
const struct vsir_block *block1 = *(const struct vsir_block **)ptr1;
|
|
const struct vsir_block *block2 = *(const struct vsir_block **)ptr2;
|
|
|
|
return vkd3d_u32_compare(block1->label, block2->label);
|
|
}
|
|
|
|
static void vsir_block_list_sort(struct vsir_block_list *list)
|
|
{
|
|
qsort(list->blocks, list->count, sizeof(*list->blocks), block_compare);
|
|
}
|
|
|
|
static bool vsir_block_list_search(struct vsir_block_list *list, struct vsir_block *block)
|
|
{
|
|
return !!bsearch(&block, list->blocks, list->count, sizeof(*list->blocks), block_compare);
|
|
}
|
|
|
|
struct vsir_cfg_structure_list
|
|
{
|
|
struct vsir_cfg_structure *structures;
|
|
size_t count, capacity;
|
|
unsigned int end;
|
|
};
|
|
|
|
struct vsir_cfg_structure
|
|
{
|
|
enum vsir_cfg_structure_type
|
|
{
|
|
/* Execute a block of the original VSIR program. */
|
|
STRUCTURE_TYPE_BLOCK,
|
|
/* Execute a loop, which is identified by an index. */
|
|
STRUCTURE_TYPE_LOOP,
|
|
/* Execute a selection construct. */
|
|
STRUCTURE_TYPE_SELECTION,
|
|
/* Execute a `return' or a (possibly) multilevel `break' or
|
|
* `continue', targeting a loop by its index. If `condition'
|
|
* is non-NULL, then the jump is conditional (this is
|
|
* currently not allowed for `return'). */
|
|
STRUCTURE_TYPE_JUMP,
|
|
} type;
|
|
union
|
|
{
|
|
struct vsir_block *block;
|
|
struct vsir_cfg_structure_loop
|
|
{
|
|
struct vsir_cfg_structure_list body;
|
|
unsigned idx;
|
|
bool needs_trampoline;
|
|
struct vsir_cfg_structure *outer_loop;
|
|
} loop;
|
|
struct vsir_cfg_structure_selection
|
|
{
|
|
struct vkd3d_shader_src_param *condition;
|
|
struct vsir_cfg_structure_list if_body;
|
|
struct vsir_cfg_structure_list else_body;
|
|
bool invert_condition;
|
|
} selection;
|
|
struct vsir_cfg_structure_jump
|
|
{
|
|
enum vsir_cfg_jump_type
|
|
{
|
|
/* NONE is available as an intermediate value, but it
|
|
* is not allowed in valid structured programs. */
|
|
JUMP_NONE,
|
|
JUMP_BREAK,
|
|
JUMP_CONTINUE,
|
|
JUMP_RET,
|
|
} type;
|
|
unsigned int target;
|
|
struct vkd3d_shader_src_param *condition;
|
|
bool invert_condition;
|
|
bool needs_launcher;
|
|
} jump;
|
|
} u;
|
|
};
|
|
|
|
static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type);
|
|
static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure);
|
|
|
|
static void vsir_cfg_structure_list_cleanup(struct vsir_cfg_structure_list *list)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < list->count; ++i)
|
|
vsir_cfg_structure_cleanup(&list->structures[i]);
|
|
vkd3d_free(list->structures);
|
|
}
|
|
|
|
static struct vsir_cfg_structure *vsir_cfg_structure_list_append(struct vsir_cfg_structure_list *list,
|
|
enum vsir_cfg_structure_type type)
|
|
{
|
|
struct vsir_cfg_structure *ret;
|
|
|
|
if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + 1,
|
|
sizeof(*list->structures)))
|
|
return NULL;
|
|
|
|
ret = &list->structures[list->count++];
|
|
|
|
vsir_cfg_structure_init(ret, type);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_structure_list_append_from_region(struct vsir_cfg_structure_list *list,
|
|
struct vsir_cfg_structure *begin, size_t size)
|
|
{
|
|
if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + size,
|
|
sizeof(*list->structures)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
memcpy(&list->structures[list->count], begin, size * sizeof(*begin));
|
|
|
|
list->count += size;
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type)
|
|
{
|
|
memset(structure, 0, sizeof(*structure));
|
|
structure->type = type;
|
|
}
|
|
|
|
static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure)
|
|
{
|
|
switch (structure->type)
|
|
{
|
|
case STRUCTURE_TYPE_LOOP:
|
|
vsir_cfg_structure_list_cleanup(&structure->u.loop.body);
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_SELECTION:
|
|
vsir_cfg_structure_list_cleanup(&structure->u.selection.if_body);
|
|
vsir_cfg_structure_list_cleanup(&structure->u.selection.else_body);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
struct vsir_cfg_emit_target
|
|
{
|
|
struct vkd3d_shader_instruction *instructions;
|
|
size_t ins_capacity, ins_count;
|
|
unsigned int jump_target_temp_idx;
|
|
unsigned int temp_count;
|
|
};
|
|
|
|
struct vsir_cfg
|
|
{
|
|
struct vkd3d_shader_message_context *message_context;
|
|
struct vsir_program *program;
|
|
struct vsir_block *blocks;
|
|
struct vsir_block *entry;
|
|
size_t block_count;
|
|
struct vkd3d_string_buffer debug_buffer;
|
|
|
|
struct vsir_block_list *loops;
|
|
size_t loops_count, loops_capacity;
|
|
size_t *loops_by_header;
|
|
|
|
struct vsir_block_list order;
|
|
struct cfg_loop_interval
|
|
{
|
|
/* `begin' is the position of the first block of the loop in
|
|
* the topological sort; `end' is the position of the first
|
|
* block after the loop. In other words, `begin' is where a
|
|
* `continue' instruction would jump and `end' is where a
|
|
* `break' instruction would jump. */
|
|
unsigned int begin, end;
|
|
/* Each loop interval can be natural or synthetic. Natural
|
|
* intervals are added to represent loops given by CFG back
|
|
* edges. Synthetic intervals do not correspond to loops in
|
|
* the input CFG, but are added to leverage their `break'
|
|
* instruction in order to execute forward edges.
|
|
*
|
|
* For a synthetic loop interval it's not really important
|
|
* which one is the `begin' block, since we don't need to
|
|
* execute `continue' for them. So we have some leeway for
|
|
* moving it provided that these conditions are met: 1. the
|
|
* interval must contain all `break' instructions that target
|
|
* it, which in practice means that `begin' can be moved
|
|
* backward and not forward; 2. intervals must remain properly
|
|
* nested (for each pair of intervals, either one contains the
|
|
* other or they are disjoint).
|
|
*
|
|
* Subject to these conditions, we try to reuse the same loop
|
|
* as much as possible (if many forward edges target the same
|
|
* block), but we still try to keep `begin' as forward as
|
|
* possible, to keep the loop scope as small as possible. */
|
|
bool synthetic;
|
|
/* The number of jump instructions (both conditional and
|
|
* unconditional) that target this loop. */
|
|
unsigned int target_count;
|
|
} *loop_intervals;
|
|
size_t loop_interval_count, loop_interval_capacity;
|
|
|
|
struct vsir_cfg_structure_list structured_program;
|
|
|
|
struct vsir_cfg_emit_target *target;
|
|
};
|
|
|
|
static void vsir_cfg_cleanup(struct vsir_cfg *cfg)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < cfg->block_count; ++i)
|
|
vsir_block_cleanup(&cfg->blocks[i]);
|
|
|
|
for (i = 0; i < cfg->loops_count; ++i)
|
|
vsir_block_list_cleanup(&cfg->loops[i]);
|
|
|
|
vsir_block_list_cleanup(&cfg->order);
|
|
|
|
vsir_cfg_structure_list_cleanup(&cfg->structured_program);
|
|
|
|
vkd3d_free(cfg->blocks);
|
|
vkd3d_free(cfg->loops);
|
|
vkd3d_free(cfg->loops_by_header);
|
|
vkd3d_free(cfg->loop_intervals);
|
|
|
|
if (TRACE_ON())
|
|
vkd3d_string_buffer_cleanup(&cfg->debug_buffer);
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin,
|
|
unsigned int end, bool synthetic)
|
|
{
|
|
struct cfg_loop_interval *interval;
|
|
|
|
if (!vkd3d_array_reserve((void **)&cfg->loop_intervals, &cfg->loop_interval_capacity,
|
|
cfg->loop_interval_count + 1, sizeof(*cfg->loop_intervals)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
interval = &cfg->loop_intervals[cfg->loop_interval_count++];
|
|
|
|
interval->begin = begin;
|
|
interval->end = end;
|
|
interval->synthetic = synthetic;
|
|
interval->target_count = 0;
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static bool vsir_block_dominates(struct vsir_block *b1, struct vsir_block *b2)
|
|
{
|
|
return bitmap_is_set(b1->dominates, b2->label - 1);
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_block *block,
|
|
struct vkd3d_shader_src_param *successor_param)
|
|
{
|
|
unsigned int target = label_from_src_param(successor_param);
|
|
struct vsir_block *successor = &cfg->blocks[target - 1];
|
|
enum vkd3d_result ret;
|
|
|
|
assert(successor->label != 0);
|
|
|
|
if ((ret = vsir_block_list_add(&block->successors, successor)) < 0)
|
|
return ret;
|
|
|
|
if ((ret = vsir_block_list_add(&successor->predecessors, block)) < 0)
|
|
return ret;
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static void vsir_cfg_dump_dot(struct vsir_cfg *cfg)
|
|
{
|
|
size_t i, j;
|
|
|
|
TRACE("digraph cfg {\n");
|
|
|
|
for (i = 0; i < cfg->block_count; ++i)
|
|
{
|
|
struct vsir_block *block = &cfg->blocks[i];
|
|
const char *shape;
|
|
|
|
if (block->label == 0)
|
|
continue;
|
|
|
|
switch (block->end->handler_idx)
|
|
{
|
|
case VKD3DSIH_RET:
|
|
shape = "trapezium";
|
|
break;
|
|
|
|
case VKD3DSIH_BRANCH:
|
|
shape = vsir_register_is_label(&block->end->src[0].reg) ? "ellipse" : "box";
|
|
break;
|
|
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
|
|
TRACE(" n%u [label=\"%u\", shape=\"%s\"];\n", block->label, block->label, shape);
|
|
|
|
for (j = 0; j < block->successors.count; ++j)
|
|
TRACE(" n%u -> n%u;\n", block->label, block->successors.blocks[j]->label);
|
|
}
|
|
|
|
TRACE("}\n");
|
|
}
|
|
|
|
static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list);
|
|
|
|
static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure *structure)
|
|
{
|
|
switch (structure->type)
|
|
{
|
|
case STRUCTURE_TYPE_BLOCK:
|
|
TRACE("%sblock %u\n", cfg->debug_buffer.buffer, structure->u.block->label);
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_LOOP:
|
|
TRACE("%s%u : loop {\n", cfg->debug_buffer.buffer, structure->u.loop.idx);
|
|
|
|
vsir_cfg_structure_list_dump(cfg, &structure->u.loop.body);
|
|
|
|
TRACE("%s} # %u%s\n", cfg->debug_buffer.buffer, structure->u.loop.idx,
|
|
structure->u.loop.needs_trampoline ? ", tramp" : "");
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_SELECTION:
|
|
TRACE("%sif {\n", cfg->debug_buffer.buffer);
|
|
|
|
vsir_cfg_structure_list_dump(cfg, &structure->u.selection.if_body);
|
|
|
|
if (structure->u.selection.else_body.count == 0)
|
|
{
|
|
TRACE("%s}\n", cfg->debug_buffer.buffer);
|
|
}
|
|
else
|
|
{
|
|
TRACE("%s} else {\n", cfg->debug_buffer.buffer);
|
|
|
|
vsir_cfg_structure_list_dump(cfg, &structure->u.selection.else_body);
|
|
|
|
TRACE("%s}\n", cfg->debug_buffer.buffer);
|
|
}
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_JUMP:
|
|
{
|
|
const char *type_str;
|
|
|
|
switch (structure->u.jump.type)
|
|
{
|
|
case JUMP_RET:
|
|
TRACE("%sret\n", cfg->debug_buffer.buffer);
|
|
return;
|
|
|
|
case JUMP_BREAK:
|
|
type_str = "break";
|
|
break;
|
|
|
|
case JUMP_CONTINUE:
|
|
type_str = "continue";
|
|
break;
|
|
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
|
|
TRACE("%s%s%s %u%s\n", cfg->debug_buffer.buffer, type_str,
|
|
structure->u.jump.condition ? "c" : "", structure->u.jump.target,
|
|
structure->u.jump.needs_launcher ? " # launch" : "");
|
|
break;
|
|
}
|
|
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
}
|
|
|
|
static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list)
|
|
{
|
|
unsigned int i;
|
|
|
|
vkd3d_string_buffer_printf(&cfg->debug_buffer, " ");
|
|
|
|
for (i = 0; i < list->count; ++i)
|
|
vsir_cfg_structure_dump(cfg, &list->structures[i]);
|
|
|
|
vkd3d_string_buffer_truncate(&cfg->debug_buffer, cfg->debug_buffer.content_size - 2);
|
|
}
|
|
|
|
static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg)
|
|
{
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < cfg->structured_program.count; ++i)
|
|
vsir_cfg_structure_dump(cfg, &cfg->structured_program.structures[i]);
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program,
|
|
struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target)
|
|
{
|
|
struct vsir_block *current_block = NULL;
|
|
enum vkd3d_result ret;
|
|
size_t i;
|
|
|
|
memset(cfg, 0, sizeof(*cfg));
|
|
cfg->message_context = message_context;
|
|
cfg->program = program;
|
|
cfg->block_count = program->block_count;
|
|
cfg->target = target;
|
|
|
|
vsir_block_list_init(&cfg->order);
|
|
|
|
if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks))))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
if (TRACE_ON())
|
|
vkd3d_string_buffer_init(&cfg->debug_buffer);
|
|
|
|
for (i = 0; i < program->instructions.count; ++i)
|
|
{
|
|
struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i];
|
|
|
|
switch (instruction->handler_idx)
|
|
{
|
|
case VKD3DSIH_PHI:
|
|
case VKD3DSIH_SWITCH_MONOLITHIC:
|
|
vkd3d_unreachable();
|
|
|
|
case VKD3DSIH_LABEL:
|
|
{
|
|
unsigned int label = label_from_src_param(&instruction->src[0]);
|
|
|
|
assert(!current_block);
|
|
assert(label > 0);
|
|
assert(label <= cfg->block_count);
|
|
current_block = &cfg->blocks[label - 1];
|
|
assert(current_block->label == 0);
|
|
if ((ret = vsir_block_init(current_block, label, program->block_count)) < 0)
|
|
goto fail;
|
|
current_block->begin = &program->instructions.elements[i + 1];
|
|
if (!cfg->entry)
|
|
cfg->entry = current_block;
|
|
break;
|
|
}
|
|
|
|
case VKD3DSIH_BRANCH:
|
|
case VKD3DSIH_RET:
|
|
assert(current_block);
|
|
current_block->end = instruction;
|
|
current_block = NULL;
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < cfg->block_count; ++i)
|
|
{
|
|
struct vsir_block *block = &cfg->blocks[i];
|
|
|
|
if (block->label == 0)
|
|
continue;
|
|
|
|
switch (block->end->handler_idx)
|
|
{
|
|
case VKD3DSIH_RET:
|
|
break;
|
|
|
|
case VKD3DSIH_BRANCH:
|
|
if (vsir_register_is_label(&block->end->src[0].reg))
|
|
{
|
|
if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[0])) < 0)
|
|
goto fail;
|
|
}
|
|
else
|
|
{
|
|
if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[1])) < 0)
|
|
goto fail;
|
|
|
|
if ((ret = vsir_cfg_add_edge(cfg, block, &block->end->src[2])) < 0)
|
|
goto fail;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
}
|
|
|
|
if (TRACE_ON())
|
|
vsir_cfg_dump_dot(cfg);
|
|
|
|
return VKD3D_OK;
|
|
|
|
fail:
|
|
vsir_cfg_cleanup(cfg);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* Block A dominates block B if every path from the entry point to B
|
|
* must pass through A. Naively compute the set of blocks that are
|
|
* dominated by `reference' by running a graph visit starting from the
|
|
* entry point (which must be the initial value of `current') and
|
|
* avoiding `reference'. Running this for all the blocks takes
|
|
* quadratic time: if in the future something better is sought after,
|
|
* the standard tool seems to be the Lengauer-Tarjan algorithm. */
|
|
static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, struct vsir_block *reference)
|
|
{
|
|
size_t i;
|
|
|
|
assert(current->label != 0);
|
|
|
|
if (current == reference)
|
|
return;
|
|
|
|
if (!bitmap_is_set(reference->dominates, current->label - 1))
|
|
return;
|
|
|
|
bitmap_clear(reference->dominates, current->label - 1);
|
|
|
|
for (i = 0; i < current->successors.count; ++i)
|
|
vsir_cfg_compute_dominators_recurse(current->successors.blocks[i], reference);
|
|
}
|
|
|
|
static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg)
|
|
{
|
|
size_t i, j;
|
|
|
|
for (i = 0; i < cfg->block_count; ++i)
|
|
{
|
|
struct vsir_block *block = &cfg->blocks[i];
|
|
|
|
if (block->label == 0)
|
|
continue;
|
|
|
|
vsir_cfg_compute_dominators_recurse(cfg->entry, block);
|
|
|
|
if (TRACE_ON())
|
|
{
|
|
vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block %u dominates:", block->label);
|
|
for (j = 0; j < cfg->block_count; j++)
|
|
{
|
|
struct vsir_block *block2 = &cfg->blocks[j];
|
|
|
|
if (block2->label == 0)
|
|
continue;
|
|
|
|
if (vsir_block_dominates(block, block2))
|
|
vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label);
|
|
}
|
|
TRACE("%s\n", cfg->debug_buffer.buffer);
|
|
vkd3d_string_buffer_clear(&cfg->debug_buffer);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* A back edge is an edge X -> Y for which block Y dominates block
|
|
* X. All the other edges are forward edges, and it is required that
|
|
* the input CFG is reducible, i.e., it is acyclic once you strip away
|
|
* the back edges.
|
|
*
|
|
* Each back edge X -> Y defines a loop: block X is the header block,
|
|
* block Y is the back edge block, and the loop consists of all the
|
|
* blocks which are dominated by the header block and have a path to
|
|
* the back edge block that doesn't pass through the header block
|
|
* (including the header block itself). It can be proved that all the
|
|
* blocks in such a path (connecting a loop block to the back edge
|
|
* block without passing through the header block) belong to the same
|
|
* loop.
|
|
*
|
|
* If the input CFG is reducible its loops are properly nested (i.e.,
|
|
* each two loops are either disjoint or one is contained in the
|
|
* other), provided that each block has at most one incoming back
|
|
* edge. If this condition does not hold, a synthetic block can be
|
|
* introduced as the only back edge block for the given header block,
|
|
* with all the previous back edge now being forward edges to the
|
|
* synthetic block. This is not currently implemented (but it is
|
|
* rarely found in practice anyway). */
|
|
static enum vkd3d_result vsir_cfg_scan_loop(struct vsir_block_list *loop, struct vsir_block *block,
|
|
struct vsir_block *header)
|
|
{
|
|
enum vkd3d_result ret;
|
|
size_t i;
|
|
|
|
if ((ret = vsir_block_list_add(loop, block)) < 0)
|
|
return ret;
|
|
|
|
if (ret == VKD3D_FALSE || block == header)
|
|
return VKD3D_OK;
|
|
|
|
for (i = 0; i < block->predecessors.count; ++i)
|
|
{
|
|
if ((ret = vsir_cfg_scan_loop(loop, block->predecessors.blocks[i], header)) < 0)
|
|
return ret;
|
|
}
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg)
|
|
{
|
|
size_t i, j, k;
|
|
|
|
if (!(cfg->loops_by_header = vkd3d_calloc(cfg->block_count, sizeof(*cfg->loops_by_header))))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
memset(cfg->loops_by_header, 0xff, cfg->block_count * sizeof(*cfg->loops_by_header));
|
|
|
|
for (i = 0; i < cfg->block_count; ++i)
|
|
{
|
|
struct vsir_block *block = &cfg->blocks[i];
|
|
|
|
if (block->label == 0)
|
|
continue;
|
|
|
|
for (j = 0; j < block->successors.count; ++j)
|
|
{
|
|
struct vsir_block *header = block->successors.blocks[j];
|
|
struct vsir_block_list *loop;
|
|
enum vkd3d_result ret;
|
|
|
|
/* Is this a back edge? */
|
|
if (!vsir_block_dominates(header, block))
|
|
continue;
|
|
|
|
if (!vkd3d_array_reserve((void **)&cfg->loops, &cfg->loops_capacity, cfg->loops_count + 1, sizeof(*cfg->loops)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
loop = &cfg->loops[cfg->loops_count];
|
|
vsir_block_list_init(loop);
|
|
|
|
if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0)
|
|
return ret;
|
|
|
|
vsir_block_list_sort(loop);
|
|
|
|
if (TRACE_ON())
|
|
{
|
|
vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label);
|
|
|
|
for (k = 0; k < loop->count; ++k)
|
|
vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", loop->blocks[k]->label);
|
|
|
|
TRACE("%s\n", cfg->debug_buffer.buffer);
|
|
vkd3d_string_buffer_clear(&cfg->debug_buffer);
|
|
}
|
|
|
|
if (cfg->loops_by_header[header->label - 1] != SIZE_MAX)
|
|
{
|
|
FIXME("Block %u is header to more than one loop, this is not implemented.\n", header->label);
|
|
vkd3d_shader_error(cfg->message_context, &header->begin->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED,
|
|
"Block %u is header to more than one loop, this is not implemented.", header->label);
|
|
return VKD3D_ERROR_NOT_IMPLEMENTED;
|
|
}
|
|
|
|
cfg->loops_by_header[header->label - 1] = cfg->loops_count;
|
|
|
|
++cfg->loops_count;
|
|
}
|
|
}
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
struct vsir_cfg_node_sorter
|
|
{
|
|
struct vsir_cfg *cfg;
|
|
struct vsir_cfg_node_sorter_stack_item
|
|
{
|
|
struct vsir_block_list *loop;
|
|
unsigned int seen_count;
|
|
unsigned int begin;
|
|
} *stack;
|
|
size_t stack_count, stack_capacity;
|
|
struct vsir_block_list available_blocks;
|
|
};
|
|
|
|
/* Topologically sort the blocks according to the forward edges. By
|
|
* definition if the input CFG is reducible then its forward edges
|
|
* form a DAG, so a topological sorting exists. In order to compute it
|
|
* we keep an array with the incoming degree for each block and an
|
|
* available list of all the blocks whose incoming degree has reached
|
|
* zero. At each step we pick a block from the available list and
|
|
* strip it away from the graph, updating the incoming degrees and
|
|
* available list.
|
|
*
|
|
* In principle at each step we can pick whatever node we want from
|
|
* the available list, and will get a topological sort
|
|
* anyway. However, we use these two criteria to give to the computed
|
|
* order additional properties:
|
|
*
|
|
* 1. we keep track of which loops we're into, and pick blocks
|
|
* belonging to the current innermost loop, so that loops are kept
|
|
* contiguous in the order; this can always be done when the input
|
|
* CFG is reducible;
|
|
*
|
|
* 2. subject to the requirement above, we always pick the most
|
|
* recently added block to the available list, because this tends
|
|
* to keep related blocks and require fewer control flow
|
|
* primitives.
|
|
*/
|
|
static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg)
|
|
{
|
|
struct vsir_cfg_node_sorter sorter = { .cfg = cfg };
|
|
unsigned int *in_degrees = NULL;
|
|
enum vkd3d_result ret;
|
|
size_t i;
|
|
|
|
if (!(in_degrees = vkd3d_calloc(cfg->block_count, sizeof(*in_degrees))))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
for (i = 0; i < cfg->block_count; ++i)
|
|
{
|
|
struct vsir_block *block = &cfg->blocks[i];
|
|
|
|
if (block->label == 0)
|
|
{
|
|
in_degrees[i] = UINT_MAX;
|
|
continue;
|
|
}
|
|
|
|
in_degrees[i] = block->predecessors.count;
|
|
|
|
/* Do not count back edges. */
|
|
if (cfg->loops_by_header[i] != SIZE_MAX)
|
|
{
|
|
assert(in_degrees[i] > 0);
|
|
in_degrees[i] -= 1;
|
|
}
|
|
|
|
if (in_degrees[i] == 0 && block != cfg->entry)
|
|
{
|
|
WARN("Unexpected entry point %u.\n", block->label);
|
|
vkd3d_shader_error(cfg->message_context, &block->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
|
|
"Block %u is unreachable from the entry point.", block->label);
|
|
ret = VKD3D_ERROR_INVALID_SHADER;
|
|
goto fail;
|
|
}
|
|
}
|
|
|
|
if (in_degrees[cfg->entry->label - 1] != 0)
|
|
{
|
|
WARN("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]);
|
|
vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
|
|
"The entry point block has %u incoming forward edges.", in_degrees[cfg->entry->label - 1]);
|
|
ret = VKD3D_ERROR_INVALID_SHADER;
|
|
goto fail;
|
|
}
|
|
|
|
vsir_block_list_init(&sorter.available_blocks);
|
|
|
|
if ((ret = vsir_block_list_add_checked(&sorter.available_blocks, cfg->entry)) < 0)
|
|
goto fail;
|
|
|
|
while (sorter.available_blocks.count != 0)
|
|
{
|
|
struct vsir_cfg_node_sorter_stack_item *inner_stack_item = NULL;
|
|
struct vsir_block *block;
|
|
size_t new_seen_count;
|
|
|
|
if (sorter.stack_count != 0)
|
|
inner_stack_item = &sorter.stack[sorter.stack_count - 1];
|
|
|
|
for (i = sorter.available_blocks.count - 1; ; --i)
|
|
{
|
|
if (i == SIZE_MAX)
|
|
{
|
|
ERR("Couldn't find any viable next block, is the input CFG reducible?\n");
|
|
ret = VKD3D_ERROR_INVALID_SHADER;
|
|
goto fail;
|
|
}
|
|
|
|
block = sorter.available_blocks.blocks[i];
|
|
|
|
if (!inner_stack_item || vsir_block_list_search(inner_stack_item->loop, block))
|
|
break;
|
|
}
|
|
|
|
/* If the node is a loop header, open the loop. */
|
|
if (sorter.cfg->loops_by_header[block->label - 1] != SIZE_MAX)
|
|
{
|
|
struct vsir_block_list *loop = &sorter.cfg->loops[sorter.cfg->loops_by_header[block->label - 1]];
|
|
|
|
if (loop)
|
|
{
|
|
if (!vkd3d_array_reserve((void **)&sorter.stack, &sorter.stack_capacity,
|
|
sorter.stack_count + 1, sizeof(*sorter.stack)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
inner_stack_item = &sorter.stack[sorter.stack_count++];
|
|
inner_stack_item->loop = loop;
|
|
inner_stack_item->seen_count = 0;
|
|
inner_stack_item->begin = sorter.cfg->order.count;
|
|
}
|
|
}
|
|
|
|
vsir_block_list_remove_index(&sorter.available_blocks, i);
|
|
block->order_pos = cfg->order.count;
|
|
if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0)
|
|
goto fail;
|
|
|
|
/* Close loops: since each loop is a strict subset of any
|
|
* outer loop, we just need to track how many blocks we've
|
|
* seen; when I close a loop I mark the same number of seen
|
|
* blocks for the next outer loop. */
|
|
new_seen_count = 1;
|
|
while (sorter.stack_count != 0)
|
|
{
|
|
inner_stack_item = &sorter.stack[sorter.stack_count - 1];
|
|
|
|
inner_stack_item->seen_count += new_seen_count;
|
|
|
|
assert(inner_stack_item->seen_count <= inner_stack_item->loop->count);
|
|
if (inner_stack_item->seen_count != inner_stack_item->loop->count)
|
|
break;
|
|
|
|
if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->begin,
|
|
cfg->order.count, false)) < 0)
|
|
goto fail;
|
|
|
|
new_seen_count = inner_stack_item->loop->count;
|
|
--sorter.stack_count;
|
|
}
|
|
|
|
/* Remove (forward) edges and make new nodes available. */
|
|
for (i = 0; i < block->successors.count; ++i)
|
|
{
|
|
struct vsir_block *successor = block->successors.blocks[i];
|
|
|
|
if (vsir_block_dominates(successor, block))
|
|
continue;
|
|
|
|
assert(in_degrees[successor->label - 1] > 0);
|
|
--in_degrees[successor->label - 1];
|
|
|
|
if (in_degrees[successor->label - 1] == 0)
|
|
{
|
|
if ((ret = vsir_block_list_add_checked(&sorter.available_blocks, successor)) < 0)
|
|
goto fail;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (cfg->order.count != cfg->block_count)
|
|
{
|
|
/* There is a cycle of forward edges. */
|
|
WARN("The control flow graph is not reducible.\n");
|
|
vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW,
|
|
"The control flow graph is not reducible.");
|
|
ret = VKD3D_ERROR_INVALID_SHADER;
|
|
goto fail;
|
|
}
|
|
|
|
assert(sorter.stack_count == 0);
|
|
|
|
vkd3d_free(in_degrees);
|
|
vkd3d_free(sorter.stack);
|
|
vsir_block_list_cleanup(&sorter.available_blocks);
|
|
|
|
if (TRACE_ON())
|
|
{
|
|
vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:");
|
|
|
|
for (i = 0; i < cfg->order.count; ++i)
|
|
vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label);
|
|
|
|
TRACE("%s\n", cfg->debug_buffer.buffer);
|
|
vkd3d_string_buffer_clear(&cfg->debug_buffer);
|
|
}
|
|
|
|
return VKD3D_OK;
|
|
|
|
fail:
|
|
vkd3d_free(in_degrees);
|
|
vkd3d_free(sorter.stack);
|
|
vsir_block_list_cleanup(&sorter.available_blocks);
|
|
|
|
return ret;
|
|
}
|
|
|
|
/* Sort loop intervals first by ascending begin time and then by
|
|
* descending end time, so that inner intervals appear after outer
|
|
* ones and disjoint intervals appear in their proper order. */
|
|
static int compare_loop_intervals(const void *ptr1, const void *ptr2)
|
|
{
|
|
const struct cfg_loop_interval *interval1 = ptr1;
|
|
const struct cfg_loop_interval *interval2 = ptr2;
|
|
|
|
if (interval1->begin != interval2->begin)
|
|
return vkd3d_u32_compare(interval1->begin, interval2->begin);
|
|
|
|
return -vkd3d_u32_compare(interval1->end, interval2->end);
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg)
|
|
{
|
|
enum vkd3d_result ret;
|
|
size_t i, j, k;
|
|
|
|
for (i = 0; i < cfg->block_count; ++i)
|
|
{
|
|
struct vsir_block *block = &cfg->blocks[i];
|
|
|
|
if (block->label == 0)
|
|
continue;
|
|
|
|
for (j = 0; j < block->successors.count; ++j)
|
|
{
|
|
struct vsir_block *successor = block->successors.blocks[j];
|
|
struct cfg_loop_interval *extend = NULL;
|
|
unsigned int begin;
|
|
enum
|
|
{
|
|
ACTION_DO_NOTHING,
|
|
ACTION_CREATE_NEW,
|
|
ACTION_EXTEND,
|
|
} action = ACTION_CREATE_NEW;
|
|
|
|
/* We've already contructed loop intervals for the back
|
|
* edges, there's nothing more to do. */
|
|
if (vsir_block_dominates(successor, block))
|
|
continue;
|
|
|
|
assert(block->order_pos < successor->order_pos);
|
|
|
|
/* Jumping from a block to the following one is always
|
|
* possible, so nothing to do. */
|
|
if (block->order_pos + 1 == successor->order_pos)
|
|
continue;
|
|
|
|
/* Let's look for a loop interval that already breaks at
|
|
* `successor' and either contains or can be extended to
|
|
* contain `block'. */
|
|
for (k = 0; k < cfg->loop_interval_count; ++k)
|
|
{
|
|
struct cfg_loop_interval *interval = &cfg->loop_intervals[k];
|
|
|
|
if (interval->end != successor->order_pos)
|
|
continue;
|
|
|
|
if (interval->begin <= block->order_pos)
|
|
{
|
|
action = ACTION_DO_NOTHING;
|
|
break;
|
|
}
|
|
|
|
if (interval->synthetic)
|
|
{
|
|
action = ACTION_EXTEND;
|
|
extend = interval;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (action == ACTION_DO_NOTHING)
|
|
continue;
|
|
|
|
/* Ok, we have to decide where the new or replacing
|
|
* interval has to begin. These are the rules: 1. it must
|
|
* begin before `block'; 2. intervals must be properly
|
|
* nested; 3. the new interval should begin as late as
|
|
* possible, to limit control flow depth and extension. */
|
|
begin = block->order_pos;
|
|
|
|
/* Our candidate interval is always [begin,
|
|
* successor->order_pos), and we move `begin' backward
|
|
* until the candidate interval contains all the intervals
|
|
* whose endpoint lies in the candidate interval
|
|
* itself. */
|
|
for (k = 0; k < cfg->loop_interval_count; ++k)
|
|
{
|
|
struct cfg_loop_interval *interval = &cfg->loop_intervals[k];
|
|
|
|
if (begin < interval->end && interval->end < successor->order_pos)
|
|
begin = min(begin, interval->begin);
|
|
}
|
|
|
|
/* New we have to care about the intervals whose begin
|
|
* point lies in the candidate interval. We cannot move
|
|
* the candidate interval endpoint, because it is
|
|
* important that the loop break target matches
|
|
* `successor'. So we have to move that interval's begin
|
|
* point to the begin point of the candidate interval,
|
|
* i.e. `begin'. But what if the interval we should extend
|
|
* backward is not synthetic? This cannot happen,
|
|
* fortunately, because it would mean that there is a jump
|
|
* entering a loop via a block which is not the loop
|
|
* header, so the CFG would not be reducible. */
|
|
for (k = 0; k < cfg->loop_interval_count; ++k)
|
|
{
|
|
struct cfg_loop_interval *interval = &cfg->loop_intervals[k];
|
|
|
|
if (interval->begin < successor->order_pos && successor->order_pos < interval->end)
|
|
{
|
|
if (interval->synthetic)
|
|
interval->begin = min(begin, interval->begin);
|
|
assert(begin >= interval->begin);
|
|
}
|
|
}
|
|
|
|
if (action == ACTION_EXTEND)
|
|
extend->begin = begin;
|
|
else if ((ret = vsir_cfg_add_loop_interval(cfg, begin, successor->order_pos, true)) < 0)
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals);
|
|
|
|
if (TRACE_ON())
|
|
for (i = 0; i < cfg->loop_interval_count; ++i)
|
|
TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural",
|
|
cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end);
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
struct vsir_cfg_edge_action
|
|
{
|
|
enum vsir_cfg_jump_type jump_type;
|
|
unsigned int target;
|
|
struct vsir_block *successor;
|
|
};
|
|
|
|
static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block *block,
|
|
struct vsir_block *successor, struct vsir_cfg_edge_action *action)
|
|
{
|
|
unsigned int i;
|
|
|
|
action->target = UINT_MAX;
|
|
action->successor = successor;
|
|
|
|
if (successor->order_pos <= block->order_pos)
|
|
{
|
|
/* The successor is before the current block, so we have to
|
|
* use `continue'. The target loop is the innermost that
|
|
* contains the current block and has the successor as
|
|
* `continue' target. */
|
|
for (i = 0; i < cfg->loop_interval_count; ++i)
|
|
{
|
|
struct cfg_loop_interval *interval = &cfg->loop_intervals[i];
|
|
|
|
if (interval->begin == successor->order_pos && block->order_pos < interval->end)
|
|
action->target = i;
|
|
|
|
if (interval->begin > successor->order_pos)
|
|
break;
|
|
}
|
|
|
|
assert(action->target != UINT_MAX);
|
|
action->jump_type = JUMP_CONTINUE;
|
|
}
|
|
else
|
|
{
|
|
/* The successor is after the current block, so we have to use
|
|
* `break', or possibly just jump to the following block. The
|
|
* target loop is the outermost that contains the current
|
|
* block and has the successor as `break' target. */
|
|
for (i = 0; i < cfg->loop_interval_count; ++i)
|
|
{
|
|
struct cfg_loop_interval *interval = &cfg->loop_intervals[i];
|
|
|
|
if (interval->begin <= block->order_pos && interval->end == successor->order_pos)
|
|
{
|
|
action->target = i;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (action->target == UINT_MAX)
|
|
{
|
|
assert(successor->order_pos == block->order_pos + 1);
|
|
action->jump_type = JUMP_NONE;
|
|
}
|
|
else
|
|
{
|
|
action->jump_type = JUMP_BREAK;
|
|
}
|
|
}
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg)
|
|
{
|
|
unsigned int i, stack_depth = 1, open_interval_idx = 0;
|
|
struct vsir_cfg_structure_list **stack = NULL;
|
|
|
|
/* It's enough to allocate up to the maximum interval stacking
|
|
* depth (plus one for the full program), but this is simpler. */
|
|
if (!(stack = vkd3d_calloc(cfg->loop_interval_count + 1, sizeof(*stack))))
|
|
goto fail;
|
|
cfg->structured_program.end = cfg->order.count;
|
|
stack[0] = &cfg->structured_program;
|
|
|
|
for (i = 0; i < cfg->order.count; ++i)
|
|
{
|
|
struct vsir_block *block = cfg->order.blocks[i];
|
|
struct vsir_cfg_structure *structure;
|
|
|
|
assert(stack_depth > 0);
|
|
|
|
/* Open loop intervals. */
|
|
while (open_interval_idx < cfg->loop_interval_count)
|
|
{
|
|
struct cfg_loop_interval *interval = &cfg->loop_intervals[open_interval_idx];
|
|
|
|
if (interval->begin != i)
|
|
break;
|
|
|
|
if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_LOOP)))
|
|
goto fail;
|
|
structure->u.loop.idx = open_interval_idx++;
|
|
|
|
structure->u.loop.body.end = interval->end;
|
|
stack[stack_depth++] = &structure->u.loop.body;
|
|
}
|
|
|
|
/* Execute the block. */
|
|
if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_BLOCK)))
|
|
goto fail;
|
|
structure->u.block = block;
|
|
|
|
/* Generate between zero and two jump instructions. */
|
|
switch (block->end->handler_idx)
|
|
{
|
|
case VKD3DSIH_BRANCH:
|
|
{
|
|
struct vsir_cfg_edge_action action_true, action_false;
|
|
bool invert_condition = false;
|
|
|
|
if (vsir_register_is_label(&block->end->src[0].reg))
|
|
{
|
|
unsigned int target = label_from_src_param(&block->end->src[0]);
|
|
struct vsir_block *successor = &cfg->blocks[target - 1];
|
|
|
|
vsir_cfg_compute_edge_action(cfg, block, successor, &action_true);
|
|
action_false = action_true;
|
|
}
|
|
else
|
|
{
|
|
unsigned int target = label_from_src_param(&block->end->src[1]);
|
|
struct vsir_block *successor = &cfg->blocks[target - 1];
|
|
|
|
vsir_cfg_compute_edge_action(cfg, block, successor, &action_true);
|
|
|
|
target = label_from_src_param(&block->end->src[2]);
|
|
successor = &cfg->blocks[target - 1];
|
|
|
|
vsir_cfg_compute_edge_action(cfg, block, successor, &action_false);
|
|
}
|
|
|
|
/* This will happen if the branch is unconditional,
|
|
* but also if it's conditional with the same target
|
|
* in both branches, which can happen in some corner
|
|
* cases, e.g. when converting switch instructions to
|
|
* selection ladders. */
|
|
if (action_true.successor == action_false.successor)
|
|
{
|
|
assert(action_true.jump_type == action_false.jump_type);
|
|
}
|
|
else
|
|
{
|
|
/* At most one branch can just fall through to the
|
|
* next block, in which case we make sure it's the
|
|
* false branch. */
|
|
if (action_true.jump_type == JUMP_NONE)
|
|
{
|
|
invert_condition = true;
|
|
}
|
|
else if (stack_depth >= 2)
|
|
{
|
|
struct vsir_cfg_structure_list *inner_loop_frame = stack[stack_depth - 2];
|
|
struct vsir_cfg_structure *inner_loop = &inner_loop_frame->structures[inner_loop_frame->count - 1];
|
|
|
|
assert(inner_loop->type == STRUCTURE_TYPE_LOOP);
|
|
|
|
/* Otherwise, if one of the branches is
|
|
* continueing the inner loop we're inside,
|
|
* make sure it's the false branch (because it
|
|
* will be optimized out later). */
|
|
if (action_true.jump_type == JUMP_CONTINUE && action_true.target == inner_loop->u.loop.idx)
|
|
invert_condition = true;
|
|
}
|
|
|
|
if (invert_condition)
|
|
{
|
|
struct vsir_cfg_edge_action tmp = action_true;
|
|
action_true = action_false;
|
|
action_false = tmp;
|
|
}
|
|
|
|
assert(action_true.jump_type != JUMP_NONE);
|
|
|
|
if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP)))
|
|
goto fail;
|
|
structure->u.jump.type = action_true.jump_type;
|
|
structure->u.jump.target = action_true.target;
|
|
structure->u.jump.condition = &block->end->src[0];
|
|
structure->u.jump.invert_condition = invert_condition;
|
|
}
|
|
|
|
if (action_false.jump_type != JUMP_NONE)
|
|
{
|
|
if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP)))
|
|
goto fail;
|
|
structure->u.jump.type = action_false.jump_type;
|
|
structure->u.jump.target = action_false.target;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case VKD3DSIH_RET:
|
|
if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP)))
|
|
goto fail;
|
|
structure->u.jump.type = JUMP_RET;
|
|
break;
|
|
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
|
|
/* Close loop intervals. */
|
|
while (stack_depth > 0)
|
|
{
|
|
if (stack[stack_depth - 1]->end != i + 1)
|
|
break;
|
|
|
|
--stack_depth;
|
|
}
|
|
}
|
|
|
|
assert(stack_depth == 0);
|
|
assert(open_interval_idx == cfg->loop_interval_count);
|
|
|
|
if (TRACE_ON())
|
|
vsir_cfg_dump_structured_program(cfg);
|
|
|
|
vkd3d_free(stack);
|
|
|
|
return VKD3D_OK;
|
|
|
|
fail:
|
|
vkd3d_free(stack);
|
|
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
static void vsir_cfg_remove_trailing_continue(struct vsir_cfg *cfg,
|
|
struct vsir_cfg_structure_list *list, unsigned int target)
|
|
{
|
|
struct vsir_cfg_structure *last = &list->structures[list->count - 1];
|
|
|
|
if (last->type == STRUCTURE_TYPE_JUMP && last->u.jump.type == JUMP_CONTINUE
|
|
&& !last->u.jump.condition && last->u.jump.target == target)
|
|
{
|
|
--list->count;
|
|
assert(cfg->loop_intervals[target].target_count > 0);
|
|
--cfg->loop_intervals[target].target_count;
|
|
}
|
|
}
|
|
|
|
static struct vsir_cfg_structure *vsir_cfg_get_trailing_break(struct vsir_cfg_structure_list *list)
|
|
{
|
|
struct vsir_cfg_structure *structure;
|
|
size_t count = list->count;
|
|
|
|
if (count == 0)
|
|
return NULL;
|
|
|
|
structure = &list->structures[count - 1];
|
|
|
|
if (structure->type != STRUCTURE_TYPE_JUMP || structure->u.jump.type != JUMP_BREAK
|
|
|| structure->u.jump.condition)
|
|
return NULL;
|
|
|
|
return structure;
|
|
}
|
|
|
|
/* When the last instruction in both branches of a selection construct
|
|
* is an unconditional break, any of them can be moved after the
|
|
* selection construct. If they break the same loop both of them can
|
|
* be moved out, otherwise we can choose which one: we choose the one
|
|
* that breaks the innermost loop, because we hope to eventually
|
|
* remove the loop itself.
|
|
*
|
|
* In principle a similar movement could be done when the last
|
|
* instructions are continue and continue, or continue and break. But
|
|
* in practice I don't think those situations can happen given the
|
|
* previous passes we do on the program, so we don't care. */
|
|
static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg *cfg,
|
|
struct vsir_cfg_structure_list *list)
|
|
{
|
|
struct vsir_cfg_structure *selection, *if_break, *else_break, *new_break;
|
|
unsigned int if_target, else_target, max_target;
|
|
size_t pos = list->count - 1;
|
|
|
|
selection = &list->structures[pos];
|
|
assert(selection->type == STRUCTURE_TYPE_SELECTION);
|
|
|
|
if_break = vsir_cfg_get_trailing_break(&selection->u.selection.if_body);
|
|
else_break = vsir_cfg_get_trailing_break(&selection->u.selection.else_body);
|
|
|
|
if (!if_break || !else_break)
|
|
return VKD3D_OK;
|
|
|
|
if_target = if_break->u.jump.target;
|
|
else_target = else_break->u.jump.target;
|
|
max_target = max(if_target, else_target);
|
|
|
|
if (!(new_break = vsir_cfg_structure_list_append(list, STRUCTURE_TYPE_JUMP)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
new_break->u.jump.type = JUMP_BREAK;
|
|
new_break->u.jump.target = max_target;
|
|
++cfg->loop_intervals[max_target].target_count;
|
|
|
|
/* Pointer `selection' could have been invalidated by the append
|
|
* operation. */
|
|
selection = &list->structures[pos];
|
|
assert(selection->type == STRUCTURE_TYPE_SELECTION);
|
|
|
|
if (if_target == max_target)
|
|
{
|
|
--selection->u.selection.if_body.count;
|
|
assert(cfg->loop_intervals[if_target].target_count > 0);
|
|
--cfg->loop_intervals[if_target].target_count;
|
|
}
|
|
|
|
if (else_target == max_target)
|
|
{
|
|
--selection->u.selection.else_body.count;
|
|
assert(cfg->loop_intervals[else_target].target_count > 0);
|
|
--cfg->loop_intervals[else_target].target_count;
|
|
}
|
|
|
|
/* If a branch becomes empty, make it the else branch, so we save a block. */
|
|
if (selection->u.selection.if_body.count == 0)
|
|
{
|
|
struct vsir_cfg_structure_list tmp;
|
|
|
|
selection->u.selection.invert_condition = !selection->u.selection.invert_condition;
|
|
tmp = selection->u.selection.if_body;
|
|
selection->u.selection.if_body = selection->u.selection.else_body;
|
|
selection->u.selection.else_body = tmp;
|
|
}
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections_recursively(struct vsir_cfg *cfg,
|
|
struct vsir_cfg_structure_list *list)
|
|
{
|
|
struct vsir_cfg_structure *trailing;
|
|
|
|
if (list->count == 0)
|
|
return VKD3D_OK;
|
|
|
|
trailing = &list->structures[list->count - 1];
|
|
|
|
if (trailing->type != STRUCTURE_TYPE_SELECTION)
|
|
return VKD3D_OK;
|
|
|
|
vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.if_body);
|
|
vsir_cfg_move_breaks_out_of_selections_recursively(cfg, &trailing->u.selection.else_body);
|
|
|
|
return vsir_cfg_move_breaks_out_of_selections(cfg, list);
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_synthesize_selections(struct vsir_cfg *cfg,
|
|
struct vsir_cfg_structure_list *list)
|
|
{
|
|
enum vkd3d_result ret;
|
|
size_t i;
|
|
|
|
for (i = 0; i < list->count; ++i)
|
|
{
|
|
struct vsir_cfg_structure *structure = &list->structures[i], new_selection, *new_jump;
|
|
|
|
if (structure->type != STRUCTURE_TYPE_JUMP || !structure->u.jump.condition)
|
|
continue;
|
|
|
|
vsir_cfg_structure_init(&new_selection, STRUCTURE_TYPE_SELECTION);
|
|
new_selection.u.selection.condition = structure->u.jump.condition;
|
|
new_selection.u.selection.invert_condition = structure->u.jump.invert_condition;
|
|
|
|
if (!(new_jump = vsir_cfg_structure_list_append(&new_selection.u.selection.if_body,
|
|
STRUCTURE_TYPE_JUMP)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
new_jump->u.jump.type = structure->u.jump.type;
|
|
new_jump->u.jump.target = structure->u.jump.target;
|
|
|
|
/* Move the rest of the structure list in the else branch
|
|
* rather than leaving it after the selection construct. The
|
|
* reason is that this is more conducive to further
|
|
* optimization, because all the conditional `break's appear
|
|
* as the last instruction of a branch of a cascade of
|
|
* selection constructs at the end of the structure list we're
|
|
* processing, instead of being buried in the middle of the
|
|
* structure list itself. */
|
|
if ((ret = vsir_cfg_structure_list_append_from_region(&new_selection.u.selection.else_body,
|
|
&list->structures[i + 1], list->count - i - 1)) < 0)
|
|
return ret;
|
|
|
|
*structure = new_selection;
|
|
list->count = i + 1;
|
|
|
|
if ((ret = vsir_cfg_synthesize_selections(cfg, &structure->u.selection.else_body)) < 0)
|
|
return ret;
|
|
|
|
if ((ret = vsir_cfg_move_breaks_out_of_selections(cfg, list)) < 0)
|
|
return ret;
|
|
|
|
break;
|
|
}
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_append_loop(struct vsir_cfg *cfg,
|
|
struct vsir_cfg_structure_list *new_list, struct vsir_cfg_structure *loop)
|
|
{
|
|
struct vsir_cfg_structure_list *loop_body = &loop->u.loop.body;
|
|
unsigned int target, loop_idx = loop->u.loop.idx;
|
|
struct vsir_cfg_structure *trailing_break;
|
|
enum vkd3d_result ret;
|
|
|
|
trailing_break = vsir_cfg_get_trailing_break(loop_body);
|
|
|
|
/* If the loop's last instruction is not a break, we cannot remove
|
|
* the loop itself. */
|
|
if (!trailing_break)
|
|
{
|
|
if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0)
|
|
return ret;
|
|
memset(loop, 0, sizeof(*loop));
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
target = trailing_break->u.jump.target;
|
|
assert(cfg->loop_intervals[target].target_count > 0);
|
|
|
|
/* If the loop is not targeted by any jump, we can remove it. The
|
|
* trailing `break' then targets another loop, so we have to keep
|
|
* it. */
|
|
if (cfg->loop_intervals[loop_idx].target_count == 0)
|
|
{
|
|
if ((ret = vsir_cfg_structure_list_append_from_region(new_list,
|
|
&loop_body->structures[0], loop_body->count)) < 0)
|
|
return ret;
|
|
loop_body->count = 0;
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
/* If the loop is targeted only by its own trailing `break'
|
|
* instruction, then we can remove it together with the `break'
|
|
* itself. */
|
|
if (target == loop_idx && cfg->loop_intervals[loop_idx].target_count == 1)
|
|
{
|
|
--cfg->loop_intervals[loop_idx].target_count;
|
|
if ((ret = vsir_cfg_structure_list_append_from_region(new_list,
|
|
&loop_body->structures[0], loop_body->count - 1)) < 0)
|
|
return ret;
|
|
loop_body->count = 0;
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0)
|
|
return ret;
|
|
memset(loop, 0, sizeof(*loop));
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_optimize_recurse(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list)
|
|
{
|
|
struct vsir_cfg_structure_list old_list = *list, *new_list = list;
|
|
enum vkd3d_result ret;
|
|
size_t i;
|
|
|
|
memset(new_list, 0, sizeof(*new_list));
|
|
|
|
for (i = 0; i < old_list.count; ++i)
|
|
{
|
|
struct vsir_cfg_structure *loop = &old_list.structures[i], *selection;
|
|
struct vsir_cfg_structure_list *loop_body;
|
|
|
|
if (loop->type != STRUCTURE_TYPE_LOOP)
|
|
{
|
|
if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0)
|
|
goto out;
|
|
memset(loop, 0, sizeof(*loop));
|
|
continue;
|
|
}
|
|
|
|
loop_body = &loop->u.loop.body;
|
|
|
|
if (loop_body->count == 0)
|
|
{
|
|
if ((ret = vsir_cfg_structure_list_append_from_region(new_list, loop, 1)) < 0)
|
|
goto out;
|
|
memset(loop, 0, sizeof(*loop));
|
|
continue;
|
|
}
|
|
|
|
vsir_cfg_remove_trailing_continue(cfg, loop_body, loop->u.loop.idx);
|
|
|
|
if ((ret = vsir_cfg_optimize_recurse(cfg, loop_body)) < 0)
|
|
goto out;
|
|
|
|
if ((ret = vsir_cfg_synthesize_selections(cfg, loop_body)) < 0)
|
|
goto out;
|
|
|
|
if ((ret = vsir_cfg_append_loop(cfg, new_list, loop)) < 0)
|
|
goto out;
|
|
|
|
/* If the last pushed instruction is a selection and one of the branches terminates with a
|
|
* `break', start pushing to the other branch, in the hope of eventually push a `break'
|
|
* there too and be able to remove a loop. */
|
|
if (new_list->count == 0)
|
|
continue;
|
|
|
|
selection = &new_list->structures[new_list->count - 1];
|
|
|
|
if (selection->type == STRUCTURE_TYPE_SELECTION)
|
|
{
|
|
if (vsir_cfg_get_trailing_break(&selection->u.selection.if_body))
|
|
new_list = &selection->u.selection.else_body;
|
|
else if (vsir_cfg_get_trailing_break(&selection->u.selection.else_body))
|
|
new_list = &selection->u.selection.if_body;
|
|
}
|
|
}
|
|
|
|
ret = vsir_cfg_move_breaks_out_of_selections_recursively(cfg, list);
|
|
|
|
out:
|
|
vsir_cfg_structure_list_cleanup(&old_list);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void vsir_cfg_count_targets(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < list->count; ++i)
|
|
{
|
|
struct vsir_cfg_structure *structure = &list->structures[i];
|
|
|
|
switch (structure->type)
|
|
{
|
|
case STRUCTURE_TYPE_BLOCK:
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_LOOP:
|
|
vsir_cfg_count_targets(cfg, &structure->u.loop.body);
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_SELECTION:
|
|
vsir_cfg_count_targets(cfg, &structure->u.selection.if_body);
|
|
vsir_cfg_count_targets(cfg, &structure->u.selection.else_body);
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_JUMP:
|
|
if (structure->u.jump.type == JUMP_BREAK || structure->u.jump.type == JUMP_CONTINUE)
|
|
++cfg->loop_intervals[structure->u.jump.target].target_count;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Trampolines are code gadgets used to emulate multilevel jumps (which are not natively supported
|
|
* by SPIR-V). A trampoline is inserted just after a loop and checks whether control has reached the
|
|
* intended site (i.e., we just jumped out of the target block) or if other levels of jumping are
|
|
* needed. For each jump a trampoline is required for all the loops between the jump itself and the
|
|
* target loop, excluding the target loop itself. */
|
|
static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list,
|
|
struct vsir_cfg_structure *loop)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < list->count; ++i)
|
|
{
|
|
struct vsir_cfg_structure *structure = &list->structures[i];
|
|
|
|
switch (structure->type)
|
|
{
|
|
case STRUCTURE_TYPE_BLOCK:
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_LOOP:
|
|
structure->u.loop.outer_loop = loop;
|
|
vsir_cfg_mark_trampolines(cfg, &structure->u.loop.body, structure);
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_SELECTION:
|
|
vsir_cfg_mark_trampolines(cfg, &structure->u.selection.if_body, loop);
|
|
vsir_cfg_mark_trampolines(cfg, &structure->u.selection.else_body, loop);
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_JUMP:
|
|
{
|
|
struct vsir_cfg_structure *l;
|
|
if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE)
|
|
break;
|
|
for (l = loop; l && l->u.loop.idx != structure->u.jump.target; l = l->u.loop.outer_loop)
|
|
{
|
|
assert(l->type == STRUCTURE_TYPE_LOOP);
|
|
l->u.loop.needs_trampoline = true;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Launchers are the counterpart of trampolines. A launcher is inserted just before a jump, and
|
|
* writes in a well-known variable what is the target of the jump. Trampolines will then read that
|
|
* variable to decide how to redirect the jump to its intended target. A launcher is needed each
|
|
* time the innermost loop containing the jump itself has a trampoline (independently of whether the
|
|
* jump is targeting that loop or not). */
|
|
static void vsir_cfg_mark_launchers(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list,
|
|
struct vsir_cfg_structure *loop)
|
|
{
|
|
size_t i;
|
|
|
|
for (i = 0; i < list->count; ++i)
|
|
{
|
|
struct vsir_cfg_structure *structure = &list->structures[i];
|
|
|
|
switch (structure->type)
|
|
{
|
|
case STRUCTURE_TYPE_BLOCK:
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_LOOP:
|
|
vsir_cfg_mark_launchers(cfg, &structure->u.loop.body, structure);
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_SELECTION:
|
|
vsir_cfg_mark_launchers(cfg, &structure->u.selection.if_body, loop);
|
|
vsir_cfg_mark_launchers(cfg, &structure->u.selection.else_body, loop);
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_JUMP:
|
|
if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE)
|
|
break;
|
|
assert(loop && loop->type == STRUCTURE_TYPE_LOOP);
|
|
if (loop->u.loop.needs_trampoline)
|
|
structure->u.jump.needs_launcher = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_optimize(struct vsir_cfg *cfg)
|
|
{
|
|
enum vkd3d_result ret;
|
|
|
|
vsir_cfg_count_targets(cfg, &cfg->structured_program);
|
|
|
|
ret = vsir_cfg_optimize_recurse(cfg, &cfg->structured_program);
|
|
|
|
/* Trampolines and launchers cannot be marked with the same pass,
|
|
* because a jump might have to be marked as launcher even when it
|
|
* targets its innermost loop, if other jumps in the same loop
|
|
* need a trampoline anyway. So launchers can be discovered only
|
|
* once all the trampolines are known. */
|
|
vsir_cfg_mark_trampolines(cfg, &cfg->structured_program, NULL);
|
|
vsir_cfg_mark_launchers(cfg, &cfg->structured_program, NULL);
|
|
|
|
if (TRACE_ON())
|
|
vsir_cfg_dump_structured_program(cfg);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg,
|
|
struct vsir_cfg_structure_list *list, unsigned int loop_idx);
|
|
|
|
static enum vkd3d_result vsir_cfg_structure_list_emit_block(struct vsir_cfg *cfg,
|
|
struct vsir_block *block)
|
|
{
|
|
struct vsir_cfg_emit_target *target = cfg->target;
|
|
|
|
if (!reserve_instructions(&target->instructions, &target->ins_capacity,
|
|
target->ins_count + (block->end - block->begin)))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
memcpy(&target->instructions[target->ins_count], block->begin,
|
|
(char *)block->end - (char *)block->begin);
|
|
|
|
target->ins_count += block->end - block->begin;
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_structure_list_emit_loop(struct vsir_cfg *cfg,
|
|
struct vsir_cfg_structure_loop *loop, unsigned int loop_idx)
|
|
{
|
|
struct vsir_cfg_emit_target *target = cfg->target;
|
|
const struct vkd3d_shader_location no_loc = {0};
|
|
enum vkd3d_result ret;
|
|
|
|
if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_LOOP);
|
|
|
|
if ((ret = vsir_cfg_structure_list_emit(cfg, &loop->body, loop->idx)) < 0)
|
|
return ret;
|
|
|
|
if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 5))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDLOOP);
|
|
|
|
/* Add a trampoline to implement multilevel jumping depending on the stored
|
|
* jump_target value. */
|
|
if (loop->needs_trampoline)
|
|
{
|
|
/* If the multilevel jump is a `continue' and the target is the loop we're inside
|
|
* right now, then we can finally do the `continue'. */
|
|
const unsigned int outer_continue_target = loop_idx << 1 | 1;
|
|
/* If the multilevel jump is a `continue' to any other target, or if it is a `break'
|
|
* and the target is not the loop we just finished emitting, then it means that
|
|
* we have to reach an outer loop, so we keep breaking. */
|
|
const unsigned int inner_break_target = loop->idx << 1;
|
|
|
|
if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
|
|
&no_loc, VKD3DSIH_IEQ, 1, 2))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count);
|
|
src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx);
|
|
src_param_init_const_uint(&target->instructions[target->ins_count].src[1], outer_continue_target);
|
|
|
|
++target->ins_count;
|
|
|
|
if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
|
|
&no_loc, VKD3DSIH_CONTINUEP, 0, 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count);
|
|
|
|
++target->ins_count;
|
|
++target->temp_count;
|
|
|
|
if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
|
|
&no_loc, VKD3DSIH_IEQ, 1, 2))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
dst_param_init_temp_bool(&target->instructions[target->ins_count].dst[0], target->temp_count);
|
|
src_param_init_temp_uint(&target->instructions[target->ins_count].src[0], target->jump_target_temp_idx);
|
|
src_param_init_const_uint(&target->instructions[target->ins_count].src[1], inner_break_target);
|
|
|
|
++target->ins_count;
|
|
|
|
if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
|
|
&no_loc, VKD3DSIH_BREAKP, 0, 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z;
|
|
|
|
src_param_init_temp_bool(&target->instructions[target->ins_count].src[0], target->temp_count);
|
|
|
|
++target->ins_count;
|
|
++target->temp_count;
|
|
}
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_structure_list_emit_selection(struct vsir_cfg *cfg,
|
|
struct vsir_cfg_structure_selection *selection, unsigned int loop_idx)
|
|
{
|
|
struct vsir_cfg_emit_target *target = cfg->target;
|
|
const struct vkd3d_shader_location no_loc = {0};
|
|
enum vkd3d_result ret;
|
|
|
|
if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
|
|
&no_loc, VKD3DSIH_IF, 0, 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
target->instructions[target->ins_count].src[0] = *selection->condition;
|
|
|
|
if (selection->invert_condition)
|
|
target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z;
|
|
|
|
++target->ins_count;
|
|
|
|
if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->if_body, loop_idx)) < 0)
|
|
return ret;
|
|
|
|
if (selection->else_body.count != 0)
|
|
{
|
|
if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ELSE);
|
|
|
|
if ((ret = vsir_cfg_structure_list_emit(cfg, &selection->else_body, loop_idx)) < 0)
|
|
return ret;
|
|
}
|
|
|
|
if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
vsir_instruction_init(&target->instructions[target->ins_count++], &no_loc, VKD3DSIH_ENDIF);
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg,
|
|
struct vsir_cfg_structure_jump *jump, unsigned int loop_idx)
|
|
{
|
|
struct vsir_cfg_emit_target *target = cfg->target;
|
|
const struct vkd3d_shader_location no_loc = {0};
|
|
/* Encode the jump target as the loop index plus a bit to remember whether
|
|
* we're breaking or continueing. */
|
|
unsigned int jump_target = jump->target << 1;
|
|
enum vkd3d_shader_opcode opcode;
|
|
|
|
switch (jump->type)
|
|
{
|
|
case JUMP_CONTINUE:
|
|
/* If we're continueing the loop we're directly inside, then we can emit a
|
|
* `continue'. Otherwise we first have to break all the loops between here
|
|
* and the loop to continue, recording our intention to continue
|
|
* in the lowest bit of jump_target. */
|
|
if (jump->target == loop_idx)
|
|
{
|
|
opcode = jump->condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE;
|
|
break;
|
|
}
|
|
jump_target |= 1;
|
|
/* fall through */
|
|
|
|
case JUMP_BREAK:
|
|
opcode = jump->condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK;
|
|
break;
|
|
|
|
case JUMP_RET:
|
|
assert(!jump->condition);
|
|
opcode = VKD3DSIH_RET;
|
|
break;
|
|
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
|
|
if (!reserve_instructions(&target->instructions, &target->ins_capacity, target->ins_count + 2))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
if (jump->needs_launcher)
|
|
{
|
|
if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
|
|
&no_loc, VKD3DSIH_MOV, 1, 1))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
dst_param_init_temp_uint(&target->instructions[target->ins_count].dst[0], target->jump_target_temp_idx);
|
|
src_param_init_const_uint(&target->instructions[target->ins_count].src[0], jump_target);
|
|
|
|
++target->ins_count;
|
|
}
|
|
|
|
if (!vsir_instruction_init_with_params(cfg->program, &target->instructions[target->ins_count],
|
|
&no_loc, opcode, 0, !!jump->condition))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
if (jump->invert_condition)
|
|
target->instructions[target->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z;
|
|
|
|
if (jump->condition)
|
|
target->instructions[target->ins_count].src[0] = *jump->condition;
|
|
|
|
++target->ins_count;
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg,
|
|
struct vsir_cfg_structure_list *list, unsigned int loop_idx)
|
|
{
|
|
enum vkd3d_result ret;
|
|
size_t i;
|
|
|
|
for (i = 0; i < list->count; ++i)
|
|
{
|
|
struct vsir_cfg_structure *structure = &list->structures[i];
|
|
|
|
switch (structure->type)
|
|
{
|
|
case STRUCTURE_TYPE_BLOCK:
|
|
if ((ret = vsir_cfg_structure_list_emit_block(cfg, structure->u.block)) < 0)
|
|
return ret;
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_LOOP:
|
|
if ((ret = vsir_cfg_structure_list_emit_loop(cfg, &structure->u.loop, loop_idx)) < 0)
|
|
return ret;
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_SELECTION:
|
|
if ((ret = vsir_cfg_structure_list_emit_selection(cfg, &structure->u.selection,
|
|
loop_idx)) < 0)
|
|
return ret;
|
|
break;
|
|
|
|
case STRUCTURE_TYPE_JUMP:
|
|
if ((ret = vsir_cfg_structure_list_emit_jump(cfg, &structure->u.jump,
|
|
loop_idx)) < 0)
|
|
return ret;
|
|
break;
|
|
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
}
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg)
|
|
{
|
|
return vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX);
|
|
}
|
|
|
|
static enum vkd3d_result vsir_program_structurize_function(struct vsir_program *program,
|
|
struct vkd3d_shader_message_context *message_context, struct vsir_cfg_emit_target *target)
|
|
{
|
|
enum vkd3d_result ret;
|
|
struct vsir_cfg cfg;
|
|
|
|
if ((ret = vsir_cfg_init(&cfg, program, message_context, target)) < 0)
|
|
return ret;
|
|
|
|
vsir_cfg_compute_dominators(&cfg);
|
|
|
|
if ((ret = vsir_cfg_compute_loops(&cfg)) < 0)
|
|
goto out;
|
|
|
|
if ((ret = vsir_cfg_sort_nodes(&cfg)) < 0)
|
|
goto out;
|
|
|
|
if ((ret = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0)
|
|
goto out;
|
|
|
|
if ((ret = vsir_cfg_build_structured_program(&cfg)) < 0)
|
|
goto out;
|
|
|
|
if ((ret = vsir_cfg_optimize(&cfg)) < 0)
|
|
goto out;
|
|
|
|
ret = vsir_cfg_emit_structured_program(&cfg);
|
|
|
|
out:
|
|
vsir_cfg_cleanup(&cfg);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_program_structurize(struct vsir_program *program,
|
|
struct vkd3d_shader_message_context *message_context)
|
|
{
|
|
struct vsir_cfg_emit_target target = {0};
|
|
enum vkd3d_result ret;
|
|
unsigned int i;
|
|
|
|
target.jump_target_temp_idx = program->temp_count;
|
|
target.temp_count = program->temp_count + 1;
|
|
|
|
if (!reserve_instructions(&target.instructions, &target.ins_capacity, program->instructions.count))
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
|
|
/* Copy declarations until the first block. */
|
|
for (i = 0; i < program->instructions.count; ++i)
|
|
{
|
|
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
|
|
|
|
if (ins->handler_idx == VKD3DSIH_LABEL)
|
|
break;
|
|
|
|
target.instructions[target.ins_count++] = *ins;
|
|
}
|
|
|
|
if ((ret = vsir_program_structurize_function(program, message_context, &target)) < 0)
|
|
goto fail;
|
|
|
|
vkd3d_free(program->instructions.elements);
|
|
program->instructions.elements = target.instructions;
|
|
program->instructions.capacity = target.ins_capacity;
|
|
program->instructions.count = target.ins_count;
|
|
program->temp_count = target.temp_count;
|
|
|
|
return VKD3D_OK;
|
|
|
|
fail:
|
|
vkd3d_free(target.instructions);
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void register_map_undominated_use(struct vkd3d_shader_register *reg, struct ssas_to_temps_alloc *alloc,
|
|
struct vsir_block *block, struct vsir_block **origin_blocks)
|
|
{
|
|
unsigned int i;
|
|
|
|
if (!register_is_ssa(reg))
|
|
return;
|
|
|
|
i = reg->idx[0].offset;
|
|
if (alloc->table[i] == UINT_MAX && !vsir_block_dominates(origin_blocks[i], block))
|
|
alloc->table[i] = alloc->next_temp_idx++;
|
|
|
|
for (i = 0; i < reg->idx_count; ++i)
|
|
if (reg->idx[i].rel_addr)
|
|
register_map_undominated_use(®->idx[i].rel_addr->reg, alloc, block, origin_blocks);
|
|
}
|
|
|
|
/* Drivers are not necessarily optimised to handle very large numbers of temps. For example,
|
|
* using them only where necessary fixes stuttering issues in Horizon Zero Dawn on RADV.
|
|
* This can also result in the backend emitting less code because temps typically need an
|
|
* access chain and a load/store. Conversion of phi SSA values to temps should eliminate all
|
|
* undominated SSA use, but structurisation may create new occurrences. */
|
|
static enum vkd3d_result vsir_cfg_materialize_undominated_ssas_to_temps(struct vsir_cfg *cfg)
|
|
{
|
|
struct vsir_program *program = cfg->program;
|
|
struct ssas_to_temps_alloc alloc = {0};
|
|
struct vsir_block **origin_blocks;
|
|
unsigned int j;
|
|
size_t i;
|
|
|
|
if (!(origin_blocks = vkd3d_calloc(program->ssa_count, sizeof(*origin_blocks))))
|
|
{
|
|
ERR("Failed to allocate origin block array.\n");
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
if (!ssas_to_temps_alloc_init(&alloc, program->ssa_count, program->temp_count))
|
|
{
|
|
vkd3d_free(origin_blocks);
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
for (i = 0; i < cfg->block_count; ++i)
|
|
{
|
|
struct vsir_block *block = &cfg->blocks[i];
|
|
struct vkd3d_shader_instruction *ins;
|
|
|
|
for (ins = block->begin; ins <= block->end; ++ins)
|
|
{
|
|
for (j = 0; j < ins->dst_count; ++j)
|
|
{
|
|
if (register_is_ssa(&ins->dst[j].reg))
|
|
origin_blocks[ins->dst[j].reg.idx[0].offset] = block;
|
|
}
|
|
}
|
|
}
|
|
|
|
for (i = 0; i < cfg->block_count; ++i)
|
|
{
|
|
struct vsir_block *block = &cfg->blocks[i];
|
|
struct vkd3d_shader_instruction *ins;
|
|
|
|
for (ins = block->begin; ins <= block->end; ++ins)
|
|
{
|
|
for (j = 0; j < ins->src_count; ++j)
|
|
register_map_undominated_use(&ins->src[j].reg, &alloc, block, origin_blocks);
|
|
}
|
|
}
|
|
|
|
if (alloc.next_temp_idx == program->temp_count)
|
|
goto done;
|
|
|
|
TRACE("Emitting temps for %u values with undominated usage.\n", alloc.next_temp_idx - program->temp_count);
|
|
|
|
for (i = 0; i < program->instructions.count; ++i)
|
|
{
|
|
struct vkd3d_shader_instruction *ins = &program->instructions.elements[i];
|
|
|
|
for (j = 0; j < ins->dst_count; ++j)
|
|
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->dst[j].reg);
|
|
|
|
for (j = 0; j < ins->src_count; ++j)
|
|
materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg);
|
|
}
|
|
|
|
program->temp_count = alloc.next_temp_idx;
|
|
done:
|
|
vkd3d_free(origin_blocks);
|
|
vkd3d_free(alloc.table);
|
|
|
|
return VKD3D_OK;
|
|
}
|
|
|
|
static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(struct vsir_program *program,
|
|
struct vkd3d_shader_message_context *message_context)
|
|
{
|
|
enum vkd3d_result ret;
|
|
struct vsir_cfg cfg;
|
|
|
|
if ((ret = vsir_cfg_init(&cfg, program, message_context, NULL)) < 0)
|
|
return ret;
|
|
|
|
vsir_cfg_compute_dominators(&cfg);
|
|
|
|
ret = vsir_cfg_materialize_undominated_ssas_to_temps(&cfg);
|
|
|
|
vsir_cfg_cleanup(&cfg);
|
|
|
|
return ret;
|
|
}
|
|
|
|
struct validation_context
|
|
{
|
|
struct vkd3d_shader_message_context *message_context;
|
|
const struct vsir_program *program;
|
|
size_t instruction_idx;
|
|
struct vkd3d_shader_location null_location;
|
|
bool invalid_instruction_idx;
|
|
enum vkd3d_result status;
|
|
bool dcl_temps_found;
|
|
enum vkd3d_shader_opcode phase;
|
|
enum cf_type
|
|
{
|
|
CF_TYPE_UNKNOWN = 0,
|
|
CF_TYPE_STRUCTURED,
|
|
CF_TYPE_BLOCKS,
|
|
} cf_type;
|
|
bool inside_block;
|
|
|
|
struct validation_context_temp_data
|
|
{
|
|
enum vsir_dimension dimension;
|
|
size_t first_seen;
|
|
} *temps;
|
|
|
|
struct validation_context_ssa_data
|
|
{
|
|
enum vsir_dimension dimension;
|
|
enum vkd3d_data_type data_type;
|
|
size_t first_seen;
|
|
uint32_t write_mask;
|
|
uint32_t read_mask;
|
|
size_t first_assigned;
|
|
} *ssas;
|
|
|
|
enum vkd3d_shader_opcode *blocks;
|
|
size_t depth;
|
|
size_t blocks_capacity;
|
|
};
|
|
|
|
static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *ctx,
|
|
enum vkd3d_shader_error error, const char *format, ...)
|
|
{
|
|
struct vkd3d_string_buffer buf;
|
|
va_list args;
|
|
|
|
vkd3d_string_buffer_init(&buf);
|
|
|
|
va_start(args, format);
|
|
vkd3d_string_buffer_vprintf(&buf, format, args);
|
|
va_end(args);
|
|
|
|
if (ctx->invalid_instruction_idx)
|
|
{
|
|
vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer);
|
|
ERR("VSIR validation error: %s\n", buf.buffer);
|
|
}
|
|
else
|
|
{
|
|
const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx];
|
|
vkd3d_shader_error(ctx->message_context, &ins->location, error,
|
|
"instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer);
|
|
ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer);
|
|
}
|
|
|
|
vkd3d_string_buffer_cleanup(&buf);
|
|
|
|
if (!ctx->status)
|
|
ctx->status = VKD3D_ERROR_INVALID_SHADER;
|
|
}
|
|
|
|
static void vsir_validate_src_param(struct validation_context *ctx,
|
|
const struct vkd3d_shader_src_param *src);
|
|
|
|
static void vsir_validate_register(struct validation_context *ctx,
|
|
const struct vkd3d_shader_register *reg)
|
|
{
|
|
unsigned int i;
|
|
|
|
if (reg->type >= VKD3DSPR_COUNT)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE, "Invalid register type %#x.",
|
|
reg->type);
|
|
|
|
if (reg->precision >= VKD3D_SHADER_REGISTER_PRECISION_COUNT)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid register precision %#x.",
|
|
reg->precision);
|
|
|
|
if (reg->data_type >= VKD3D_DATA_COUNT)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid register data type %#x.",
|
|
reg->data_type);
|
|
|
|
if (reg->dimension >= VSIR_DIMENSION_COUNT)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid register dimension %#x.",
|
|
reg->dimension);
|
|
|
|
if (reg->idx_count > ARRAY_SIZE(reg->idx))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid register index count %u.",
|
|
reg->idx_count);
|
|
|
|
for (i = 0; i < min(reg->idx_count, ARRAY_SIZE(reg->idx)); ++i)
|
|
{
|
|
const struct vkd3d_shader_src_param *param = reg->idx[i].rel_addr;
|
|
if (reg->idx[i].rel_addr)
|
|
vsir_validate_src_param(ctx, param);
|
|
}
|
|
|
|
switch (reg->type)
|
|
{
|
|
case VKD3DSPR_TEMP:
|
|
{
|
|
struct validation_context_temp_data *data;
|
|
|
|
if (reg->idx_count != 1)
|
|
{
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a TEMP register.",
|
|
reg->idx_count);
|
|
break;
|
|
}
|
|
|
|
if (reg->idx[0].rel_addr)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register.");
|
|
|
|
if (reg->idx[0].offset >= ctx->program->temp_count)
|
|
{
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.",
|
|
reg->idx[0].offset, ctx->program->temp_count);
|
|
break;
|
|
}
|
|
|
|
data = &ctx->temps[reg->idx[0].offset];
|
|
|
|
if (reg->dimension == VSIR_DIMENSION_NONE)
|
|
{
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a TEMP register.");
|
|
break;
|
|
}
|
|
|
|
/* TEMP registers can be scalar or vec4, provided that
|
|
* each individual register always appears with the same
|
|
* dimension. */
|
|
if (data->dimension == VSIR_DIMENSION_NONE)
|
|
{
|
|
data->dimension = reg->dimension;
|
|
data->first_seen = ctx->instruction_idx;
|
|
}
|
|
else if (data->dimension != reg->dimension)
|
|
{
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a TEMP register: "
|
|
"it has already been seen with dimension %#x at instruction %zu.",
|
|
reg->dimension, data->dimension, data->first_seen);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case VKD3DSPR_SSA:
|
|
{
|
|
struct validation_context_ssa_data *data;
|
|
|
|
if (reg->idx_count != 1)
|
|
{
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a SSA register.",
|
|
reg->idx_count);
|
|
break;
|
|
}
|
|
|
|
if (reg->idx[0].rel_addr)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a SSA register.");
|
|
|
|
if (reg->idx[0].offset >= ctx->program->ssa_count)
|
|
{
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
|
|
"SSA register index %u exceeds the maximum count %u.",
|
|
reg->idx[0].offset, ctx->program->ssa_count);
|
|
break;
|
|
}
|
|
|
|
data = &ctx->ssas[reg->idx[0].offset];
|
|
|
|
if (reg->dimension == VSIR_DIMENSION_NONE)
|
|
{
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension NONE for a SSA register.");
|
|
break;
|
|
}
|
|
|
|
/* SSA registers can be scalar or vec4, provided that each
|
|
* individual register always appears with the same
|
|
* dimension. */
|
|
if (data->dimension == VSIR_DIMENSION_NONE)
|
|
{
|
|
data->dimension = reg->dimension;
|
|
data->data_type = reg->data_type;
|
|
data->first_seen = ctx->instruction_idx;
|
|
}
|
|
else
|
|
{
|
|
if (data->dimension != reg->dimension)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a SSA register: "
|
|
"it has already been seen with dimension %#x at instruction %zu.",
|
|
reg->dimension, data->dimension, data->first_seen);
|
|
|
|
if (data_type_is_64_bit(data->data_type) != data_type_is_64_bit(reg->data_type))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a SSA register: "
|
|
"it has already been seen with data type %#x at instruction %zu.",
|
|
reg->data_type, data->data_type, data->first_seen);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case VKD3DSPR_LABEL:
|
|
if (reg->precision != VKD3D_SHADER_REGISTER_PRECISION_DEFAULT)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.",
|
|
reg->precision);
|
|
|
|
if (reg->data_type != VKD3D_DATA_UNUSED)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.",
|
|
reg->data_type);
|
|
|
|
if (reg->dimension != VSIR_DIMENSION_NONE)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION, "Invalid dimension %#x for a LABEL register.",
|
|
reg->dimension);
|
|
|
|
if (reg->idx_count != 1)
|
|
{
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a LABEL register.",
|
|
reg->idx_count);
|
|
break;
|
|
}
|
|
|
|
if (reg->idx[0].rel_addr)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a LABEL register.");
|
|
|
|
/* Index == 0 is invalid, but it is temporarily allowed
|
|
* for intermediate stages. Once we support validation
|
|
* dialects we can selectively check for that. */
|
|
if (reg->idx[0].offset > ctx->program->block_count)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX,
|
|
"LABEL register index %u exceeds the maximum count %u.",
|
|
reg->idx[0].offset, ctx->program->block_count);
|
|
break;
|
|
|
|
case VKD3DSPR_NULL:
|
|
if (reg->idx_count != 0)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a NULL register.",
|
|
reg->idx_count);
|
|
break;
|
|
|
|
case VKD3DSPR_IMMCONST:
|
|
if (reg->idx_count != 0)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST register.",
|
|
reg->idx_count);
|
|
break;
|
|
|
|
case VKD3DSPR_IMMCONST64:
|
|
if (reg->idx_count != 0)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX_COUNT, "Invalid index count %u for a IMMCONST64 register.",
|
|
reg->idx_count);
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void vsir_validate_dst_param(struct validation_context *ctx,
|
|
const struct vkd3d_shader_dst_param *dst)
|
|
{
|
|
vsir_validate_register(ctx, &dst->reg);
|
|
|
|
if (dst->write_mask & ~VKD3DSP_WRITEMASK_ALL)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination has invalid write mask %#x.",
|
|
dst->write_mask);
|
|
|
|
switch (dst->reg.dimension)
|
|
{
|
|
case VSIR_DIMENSION_SCALAR:
|
|
if (dst->write_mask != VKD3DSP_WRITEMASK_0)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Scalar destination has invalid write mask %#x.",
|
|
dst->write_mask);
|
|
break;
|
|
|
|
case VSIR_DIMENSION_VEC4:
|
|
if (dst->write_mask == 0)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Vec4 destination has empty write mask.");
|
|
break;
|
|
|
|
default:
|
|
if (dst->write_mask != 0)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_WRITE_MASK, "Destination of dimension %u has invalid write mask %#x.",
|
|
dst->reg.dimension, dst->write_mask);
|
|
break;
|
|
}
|
|
|
|
if (dst->modifiers & ~VKD3DSPDM_MASK)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Destination has invalid modifiers %#x.",
|
|
dst->modifiers);
|
|
|
|
switch (dst->shift)
|
|
{
|
|
case 0:
|
|
case 1:
|
|
case 2:
|
|
case 3:
|
|
case 13:
|
|
case 14:
|
|
case 15:
|
|
break;
|
|
|
|
default:
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT, "Destination has invalid shift %#x.",
|
|
dst->shift);
|
|
}
|
|
|
|
switch (dst->reg.type)
|
|
{
|
|
case VKD3DSPR_SSA:
|
|
if (dst->reg.idx[0].offset < ctx->program->ssa_count)
|
|
{
|
|
struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset];
|
|
|
|
if (data->write_mask == 0)
|
|
{
|
|
data->write_mask = dst->write_mask;
|
|
data->first_assigned = ctx->instruction_idx;
|
|
}
|
|
else
|
|
{
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE,
|
|
"SSA register is already assigned at instruction %zu.",
|
|
data->first_assigned);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case VKD3DSPR_IMMCONST:
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
|
|
"Invalid IMMCONST register used as destination parameter.");
|
|
break;
|
|
|
|
case VKD3DSPR_IMMCONST64:
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
|
|
"Invalid IMMCONST64 register used as destination parameter.");
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void vsir_validate_src_param(struct validation_context *ctx,
|
|
const struct vkd3d_shader_src_param *src)
|
|
{
|
|
vsir_validate_register(ctx, &src->reg);
|
|
|
|
if (src->swizzle & ~0x03030303u)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source has invalid swizzle %#x.",
|
|
src->swizzle);
|
|
|
|
if (src->reg.dimension != VSIR_DIMENSION_VEC4 && src->swizzle != 0)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "Source of dimension %u has invalid swizzle %#x.",
|
|
src->reg.dimension, src->swizzle);
|
|
|
|
if (src->modifiers >= VKD3DSPSM_COUNT)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS, "Source has invalid modifiers %#x.",
|
|
src->modifiers);
|
|
|
|
switch (src->reg.type)
|
|
{
|
|
case VKD3DSPR_SSA:
|
|
if (src->reg.idx[0].offset < ctx->program->ssa_count)
|
|
{
|
|
struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset];
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < VKD3D_VEC4_SIZE; ++i)
|
|
data->read_mask |= (1u << vsir_swizzle_get_component(src->swizzle, i));
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void vsir_validate_dst_count(struct validation_context *ctx,
|
|
const struct vkd3d_shader_instruction *instruction, unsigned int count)
|
|
{
|
|
if (instruction->dst_count != count)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT,
|
|
"Invalid destination count %u for an instruction of type %#x, expected %u.",
|
|
instruction->dst_count, instruction->handler_idx, count);
|
|
}
|
|
|
|
static void vsir_validate_src_count(struct validation_context *ctx,
|
|
const struct vkd3d_shader_instruction *instruction, unsigned int count)
|
|
{
|
|
if (instruction->src_count != count)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
|
|
"Invalid source count %u for an instruction of type %#x, expected %u.",
|
|
instruction->src_count, instruction->handler_idx, count);
|
|
}
|
|
|
|
static bool vsir_validate_src_min_count(struct validation_context *ctx,
|
|
const struct vkd3d_shader_instruction *instruction, unsigned int count)
|
|
{
|
|
if (instruction->src_count < count)
|
|
{
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
|
|
"Invalid source count %u for an instruction of type %#x, expected at least %u.",
|
|
instruction->src_count, instruction->handler_idx, count);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool vsir_validate_src_max_count(struct validation_context *ctx,
|
|
const struct vkd3d_shader_instruction *instruction, unsigned int count)
|
|
{
|
|
if (instruction->src_count > count)
|
|
{
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
|
|
"Invalid source count %u for an instruction of type %#x, expected at most %u.",
|
|
instruction->src_count, instruction->handler_idx, count);
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static const char *name_from_cf_type(enum cf_type type)
|
|
{
|
|
switch (type)
|
|
{
|
|
case CF_TYPE_STRUCTURED:
|
|
return "structured";
|
|
case CF_TYPE_BLOCKS:
|
|
return "block-based";
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
}
|
|
|
|
static void vsir_validate_cf_type(struct validation_context *ctx,
|
|
const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type)
|
|
{
|
|
assert(ctx->cf_type != CF_TYPE_UNKNOWN);
|
|
assert(expected_type != CF_TYPE_UNKNOWN);
|
|
if (ctx->cf_type != expected_type)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.",
|
|
instruction->handler_idx, name_from_cf_type(ctx->cf_type));
|
|
}
|
|
|
|
static void vsir_validate_instruction(struct validation_context *ctx)
|
|
{
|
|
const struct vkd3d_shader_version *version = &ctx->program->shader_version;
|
|
const struct vkd3d_shader_instruction *instruction;
|
|
size_t i;
|
|
|
|
instruction = &ctx->program->instructions.elements[ctx->instruction_idx];
|
|
|
|
for (i = 0; i < instruction->dst_count; ++i)
|
|
vsir_validate_dst_param(ctx, &instruction->dst[i]);
|
|
|
|
for (i = 0; i < instruction->src_count; ++i)
|
|
vsir_validate_src_param(ctx, &instruction->src[i]);
|
|
|
|
if (instruction->handler_idx >= VKD3DSIH_INVALID)
|
|
{
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.",
|
|
instruction->handler_idx);
|
|
}
|
|
|
|
switch (instruction->handler_idx)
|
|
{
|
|
case VKD3DSIH_HS_DECLS:
|
|
case VKD3DSIH_HS_CONTROL_POINT_PHASE:
|
|
case VKD3DSIH_HS_FORK_PHASE:
|
|
case VKD3DSIH_HS_JOIN_PHASE:
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 0);
|
|
if (version->type != VKD3D_SHADER_TYPE_HULL)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Phase instruction %#x is only valid in a hull shader.",
|
|
instruction->handler_idx);
|
|
if (ctx->depth != 0)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Phase instruction %#x must appear to top level.",
|
|
instruction->handler_idx);
|
|
ctx->phase = instruction->handler_idx;
|
|
ctx->dcl_temps_found = false;
|
|
return;
|
|
|
|
case VKD3DSIH_DCL_HS_MAX_TESSFACTOR:
|
|
/* Exclude non-finite values. */
|
|
if (!(instruction->declaration.max_tessellation_factor >= 1.0f
|
|
&& instruction->declaration.max_tessellation_factor <= 64.0f))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Max tessellation factor %f is invalid.",
|
|
instruction->declaration.max_tessellation_factor);
|
|
return;
|
|
|
|
/* The DXIL parser can generate these outside phases, but this is not an issue. */
|
|
case VKD3DSIH_DCL_INPUT:
|
|
case VKD3DSIH_DCL_OUTPUT:
|
|
return;
|
|
|
|
case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT:
|
|
if (!instruction->declaration.count || instruction->declaration.count > 32)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION, "Output control point count %u is invalid.",
|
|
instruction->declaration.count);
|
|
return;
|
|
|
|
case VKD3DSIH_DCL_TESSELLATOR_DOMAIN:
|
|
if (instruction->declaration.tessellator_domain == VKD3D_TESSELLATOR_DOMAIN_INVALID
|
|
|| instruction->declaration.tessellator_domain >= VKD3D_TESSELLATOR_DOMAIN_COUNT)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
|
|
"Tessellator domain %#x is invalid.", instruction->declaration.tessellator_domain);
|
|
return;
|
|
|
|
case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE:
|
|
if (!instruction->declaration.tessellator_output_primitive
|
|
|| instruction->declaration.tessellator_output_primitive > VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
|
|
"Tessellator output primitive %#x is invalid.", instruction->declaration.tessellator_output_primitive);
|
|
return;
|
|
|
|
case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING:
|
|
if (!instruction->declaration.tessellator_partitioning
|
|
|| instruction->declaration.tessellator_partitioning > VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_TESSELLATION,
|
|
"Tessellator partitioning %#x is invalid.", instruction->declaration.tessellator_partitioning);
|
|
return;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (version->type == VKD3D_SHADER_TYPE_HULL && ctx->phase == VKD3DSIH_INVALID)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER,
|
|
"Instruction %#x appear before any phase instruction in a hull shader.",
|
|
instruction->handler_idx);
|
|
|
|
/* We support two different control flow types in shaders:
|
|
* block-based, like DXIL and SPIR-V, and structured, like D3DBC
|
|
* and TPF. The shader is detected as block-based when its first
|
|
* instruction, except for DCL_* and phases, is a LABEL. Currently
|
|
* we mandate that each shader is either purely block-based or
|
|
* purely structured. In principle we could allow structured
|
|
* constructs in a block, provided they are confined in a single
|
|
* block, but need for that hasn't arisen yet, so we don't. */
|
|
if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction))
|
|
{
|
|
if (instruction->handler_idx == VKD3DSIH_LABEL)
|
|
ctx->cf_type = CF_TYPE_BLOCKS;
|
|
else
|
|
ctx->cf_type = CF_TYPE_STRUCTURED;
|
|
}
|
|
|
|
if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction))
|
|
{
|
|
switch (instruction->handler_idx)
|
|
{
|
|
case VKD3DSIH_LABEL:
|
|
if (ctx->inside_block)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid LABEL instruction inside a block.");
|
|
ctx->inside_block = true;
|
|
break;
|
|
|
|
case VKD3DSIH_RET:
|
|
case VKD3DSIH_BRANCH:
|
|
case VKD3DSIH_SWITCH_MONOLITHIC:
|
|
if (!ctx->inside_block)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.",
|
|
instruction->handler_idx);
|
|
ctx->inside_block = false;
|
|
break;
|
|
|
|
default:
|
|
if (!ctx->inside_block)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.",
|
|
instruction->handler_idx);
|
|
break;
|
|
}
|
|
}
|
|
|
|
switch (instruction->handler_idx)
|
|
{
|
|
case VKD3DSIH_DCL_TEMPS:
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 0);
|
|
if (ctx->dcl_temps_found)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_DUPLICATE_DCL_TEMPS, "Duplicate DCL_TEMPS instruction.");
|
|
if (instruction->declaration.count > ctx->program->temp_count)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DCL_TEMPS,
|
|
"Invalid DCL_TEMPS count %u, expected at most %u.",
|
|
instruction->declaration.count, ctx->program->temp_count);
|
|
ctx->dcl_temps_found = true;
|
|
break;
|
|
|
|
case VKD3DSIH_IF:
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 1);
|
|
if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
|
|
return;
|
|
ctx->blocks[ctx->depth++] = instruction->handler_idx;
|
|
break;
|
|
|
|
case VKD3DSIH_IFC:
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 2);
|
|
if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
|
|
return;
|
|
ctx->blocks[ctx->depth++] = VKD3DSIH_IF;
|
|
break;
|
|
|
|
case VKD3DSIH_ELSE:
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 0);
|
|
if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block.");
|
|
else
|
|
ctx->blocks[ctx->depth - 1] = instruction->handler_idx;
|
|
break;
|
|
|
|
case VKD3DSIH_ENDIF:
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 0);
|
|
if (ctx->depth == 0 || (ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF && ctx->blocks[ctx->depth - 1] != VKD3DSIH_ELSE))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDIF instruction doesn't terminate IF/ELSE block.");
|
|
else
|
|
--ctx->depth;
|
|
break;
|
|
|
|
case VKD3DSIH_LOOP:
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0);
|
|
if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
|
|
return;
|
|
ctx->blocks[ctx->depth++] = instruction->handler_idx;
|
|
break;
|
|
|
|
case VKD3DSIH_ENDLOOP:
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 0);
|
|
if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_LOOP)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDLOOP instruction doesn't terminate LOOP block.");
|
|
else
|
|
--ctx->depth;
|
|
break;
|
|
|
|
case VKD3DSIH_REP:
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 1);
|
|
if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
|
|
return;
|
|
ctx->blocks[ctx->depth++] = instruction->handler_idx;
|
|
break;
|
|
|
|
case VKD3DSIH_ENDREP:
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 0);
|
|
if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_REP)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDREP instruction doesn't terminate REP block.");
|
|
else
|
|
--ctx->depth;
|
|
break;
|
|
|
|
case VKD3DSIH_SWITCH:
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 1);
|
|
if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks)))
|
|
return;
|
|
ctx->blocks[ctx->depth++] = instruction->handler_idx;
|
|
break;
|
|
|
|
case VKD3DSIH_ENDSWITCH:
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_STRUCTURED);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 0);
|
|
if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_SWITCH)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ENDSWITCH instruction doesn't terminate SWITCH block.");
|
|
else
|
|
--ctx->depth;
|
|
break;
|
|
|
|
case VKD3DSIH_RET:
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 0);
|
|
break;
|
|
|
|
case VKD3DSIH_LABEL:
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
vsir_validate_src_count(ctx, instruction, 1);
|
|
if (instruction->src_count >= 1 && !vsir_register_is_label(&instruction->src[0].reg))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
|
|
"Invalid register of type %#x in a LABEL instruction, expected LABEL.",
|
|
instruction->src[0].reg.type);
|
|
break;
|
|
|
|
case VKD3DSIH_BRANCH:
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
if (!vsir_validate_src_min_count(ctx, instruction, 1))
|
|
break;
|
|
if (vsir_register_is_label(&instruction->src[0].reg))
|
|
{
|
|
/* Unconditional branch: parameters are jump label,
|
|
* optional merge label, optional continue label. */
|
|
vsir_validate_src_max_count(ctx, instruction, 3);
|
|
|
|
for (i = 0; i < instruction->src_count; ++i)
|
|
{
|
|
if (!vsir_register_is_label(&instruction->src[i].reg))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
|
|
"Invalid register of type %#x in unconditional BRANCH instruction, expected LABEL.",
|
|
instruction->src[i].reg.type);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Conditional branch: parameters are condition, true
|
|
* jump label, false jump label, optional merge label,
|
|
* optional continue label. */
|
|
vsir_validate_src_min_count(ctx, instruction, 3);
|
|
vsir_validate_src_max_count(ctx, instruction, 5);
|
|
|
|
for (i = 1; i < instruction->src_count; ++i)
|
|
{
|
|
if (!vsir_register_is_label(&instruction->src[i].reg))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
|
|
"Invalid register of type %#x in conditional BRANCH instruction, expected LABEL.",
|
|
instruction->src[i].reg.type);
|
|
}
|
|
}
|
|
break;
|
|
|
|
case VKD3DSIH_SWITCH_MONOLITHIC:
|
|
{
|
|
unsigned int case_count;
|
|
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
|
|
vsir_validate_dst_count(ctx, instruction, 0);
|
|
/* Parameters are source, default label, merge label and
|
|
* then pairs of constant value and case label. */
|
|
if (!vsir_validate_src_min_count(ctx, instruction, 3))
|
|
break;
|
|
if (instruction->src_count % 2 != 1)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
|
|
"Invalid source count %u for a monolithic SWITCH instruction, it must be an odd number.",
|
|
instruction->src_count);
|
|
|
|
if (!vsir_register_is_label(&instruction->src[1].reg))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
|
|
"Invalid default label register of type %#x in monolithic SWITCH instruction, expected LABEL.",
|
|
instruction->src[1].reg.type);
|
|
|
|
if (!vsir_register_is_label(&instruction->src[2].reg))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
|
|
"Invalid merge label register of type %#x in monolithic SWITCH instruction, expected LABEL.",
|
|
instruction->src[2].reg.type);
|
|
|
|
case_count = (instruction->src_count - 3) / 2;
|
|
|
|
for (i = 0; i < case_count; ++i)
|
|
{
|
|
unsigned int value_idx = 3 + 2 * i;
|
|
unsigned int label_idx = 3 + 2 * i + 1;
|
|
|
|
if (!register_is_constant(&instruction->src[value_idx].reg))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
|
|
"Invalid value register for case %zu of type %#x in monolithic SWITCH instruction, "
|
|
"expected IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type);
|
|
|
|
if (!vsir_register_is_label(&instruction->src[label_idx].reg))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
|
|
"Invalid label register for case %zu of type %#x in monolithic SWITCH instruction, "
|
|
"expected LABEL.", i, instruction->src[value_idx].reg.type);
|
|
}
|
|
break;
|
|
}
|
|
|
|
case VKD3DSIH_PHI:
|
|
{
|
|
unsigned int incoming_count;
|
|
|
|
vsir_validate_cf_type(ctx, instruction, CF_TYPE_BLOCKS);
|
|
vsir_validate_dst_count(ctx, instruction, 1);
|
|
vsir_validate_src_min_count(ctx, instruction, 2);
|
|
if (instruction->src_count % 2 != 0)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT,
|
|
"Invalid source count %u for a PHI instruction, it must be an even number.",
|
|
instruction->src_count);
|
|
incoming_count = instruction->src_count / 2;
|
|
|
|
if (!register_is_ssa(&instruction->dst[0].reg))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
|
|
"Invalid destination of type %#x in PHI instruction, expected SSA.",
|
|
instruction->dst[0].reg.type);
|
|
|
|
if (instruction->dst[0].reg.dimension != VSIR_DIMENSION_SCALAR)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
|
|
"Invalid destination dimension %#x in PHI instruction, expected scalar.",
|
|
instruction->dst[0].reg.dimension);
|
|
|
|
if (instruction->dst[0].modifiers != VKD3DSPDM_NONE)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_MODIFIERS,
|
|
"Invalid modifiers %#x for the destination of a PHI instruction, expected none.",
|
|
instruction->dst[0].modifiers);
|
|
|
|
if (instruction->dst[0].shift != 0)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SHIFT,
|
|
"Invalid shift %#x for the destination of a PHI instruction, expected none.",
|
|
instruction->dst[0].shift);
|
|
|
|
for (i = 0; i < incoming_count; ++i)
|
|
{
|
|
unsigned int value_idx = 2 * i;
|
|
unsigned int label_idx = 2 * i + 1;
|
|
|
|
if (!register_is_constant(&instruction->src[value_idx].reg) && !register_is_ssa(&instruction->src[value_idx].reg))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
|
|
"Invalid value register for incoming %zu of type %#x in PHI instruction, "
|
|
"expected SSA, IMMCONST or IMMCONST64.", i, instruction->src[value_idx].reg.type);
|
|
|
|
if (instruction->src[value_idx].reg.dimension != VSIR_DIMENSION_SCALAR)
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DIMENSION,
|
|
"Invalid value dimension %#x for incoming %zu in PHI instruction, expected scalar.",
|
|
instruction->src[value_idx].reg.dimension, i);
|
|
|
|
if (!vsir_register_is_label(&instruction->src[label_idx].reg))
|
|
validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_REGISTER_TYPE,
|
|
"Invalid label register for case %zu of type %#x in PHI instruction, "
|
|
"expected LABEL.", i, instruction->src[value_idx].reg.type);
|
|
}
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags,
|
|
const char *source_name, struct vkd3d_shader_message_context *message_context)
|
|
{
|
|
struct validation_context ctx =
|
|
{
|
|
.message_context = message_context,
|
|
.program = program,
|
|
.null_location = {.source_name = source_name},
|
|
.status = VKD3D_OK,
|
|
.phase = VKD3DSIH_INVALID,
|
|
};
|
|
unsigned int i;
|
|
|
|
if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION))
|
|
return VKD3D_OK;
|
|
|
|
if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps))))
|
|
goto fail;
|
|
|
|
if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas))))
|
|
goto fail;
|
|
|
|
for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx)
|
|
vsir_validate_instruction(&ctx);
|
|
|
|
ctx.invalid_instruction_idx = true;
|
|
|
|
if (ctx.depth != 0)
|
|
validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "%zu nested blocks were not closed.", ctx.depth);
|
|
|
|
if (ctx.inside_block)
|
|
validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Last block was not closed.");
|
|
|
|
for (i = 0; i < ctx.program->ssa_count; ++i)
|
|
{
|
|
struct validation_context_ssa_data *data = &ctx.ssas[i];
|
|
|
|
if ((data->write_mask | data->read_mask) != data->write_mask)
|
|
validator_error(&ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SSA_USAGE,
|
|
"SSA register %u has invalid read mask %#x, which is not a subset of the write mask %#x "
|
|
"at the point of definition.", i, data->read_mask, data->write_mask);
|
|
}
|
|
|
|
vkd3d_free(ctx.blocks);
|
|
vkd3d_free(ctx.temps);
|
|
vkd3d_free(ctx.ssas);
|
|
|
|
return ctx.status;
|
|
|
|
fail:
|
|
vkd3d_free(ctx.blocks);
|
|
vkd3d_free(ctx.temps);
|
|
vkd3d_free(ctx.ssas);
|
|
|
|
return VKD3D_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags,
|
|
const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context)
|
|
{
|
|
enum vkd3d_result result = VKD3D_OK;
|
|
|
|
if ((result = vsir_program_lower_instructions(program)) < 0)
|
|
return result;
|
|
|
|
if (program->shader_version.major >= 6)
|
|
{
|
|
if ((result = vsir_program_materialise_phi_ssas_to_temps(program)) < 0)
|
|
return result;
|
|
|
|
if ((result = lower_switch_to_if_ladder(program)) < 0)
|
|
return result;
|
|
|
|
if ((result = vsir_program_structurize(program, message_context)) < 0)
|
|
return result;
|
|
|
|
if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
|
|
return result;
|
|
|
|
if ((result = vsir_program_materialize_undominated_ssas_to_temps(program, message_context)) < 0)
|
|
return result;
|
|
}
|
|
else
|
|
{
|
|
if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL)
|
|
{
|
|
if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0)
|
|
return result;
|
|
}
|
|
|
|
if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL)
|
|
{
|
|
if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0)
|
|
return result;
|
|
|
|
if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions,
|
|
&program->input_signature)) < 0)
|
|
return result;
|
|
}
|
|
|
|
if ((result = vsir_program_normalise_io_registers(program)) < 0)
|
|
return result;
|
|
|
|
if ((result = instruction_array_normalise_flat_constants(program)) < 0)
|
|
return result;
|
|
|
|
remove_dead_code(program);
|
|
|
|
if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0)
|
|
return result;
|
|
|
|
if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0)
|
|
return result;
|
|
}
|
|
|
|
if (TRACE_ON())
|
|
vkd3d_shader_trace(program);
|
|
|
|
if ((result = vsir_program_validate(program, config_flags,
|
|
compile_info->source_name, message_context)) < 0)
|
|
return result;
|
|
|
|
return result;
|
|
}
|