mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2025-01-28 13:05:02 -08:00
vkd3d-shader/hlsl: Add special allocation rules for PRIMID, RTINDEX, and VPINDEX.
These system values are bound to the same allocation rules as other semantics: they can share registers with other semantics with the same interpolation mode and they prefer forming shorter writemasks. However, for some reason, these don't allow further semantics to share the same register once allocated, except among themselves.
This commit is contained in:
parent
2c0773c9ad
commit
ad5377f995
Notes:
Henri Verbeet
2024-11-24 00:09:47 +01:00
Approved-by: Elizabeth Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1260
@ -4496,6 +4496,9 @@ struct register_allocator
|
||||
|
||||
/* Two allocations with different mode can't share the same register. */
|
||||
int mode;
|
||||
/* If an allocation is VIP, no new allocations can be made in the
|
||||
* register unless they are VIP as well. */
|
||||
bool vip;
|
||||
} *allocations;
|
||||
size_t count, capacity;
|
||||
|
||||
@ -4515,7 +4518,7 @@ struct register_allocator
|
||||
};
|
||||
|
||||
static unsigned int get_available_writemask(const struct register_allocator *allocator,
|
||||
unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode)
|
||||
unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode, bool vip)
|
||||
{
|
||||
unsigned int writemask = VKD3DSP_WRITEMASK_ALL;
|
||||
size_t i;
|
||||
@ -4534,6 +4537,8 @@ static unsigned int get_available_writemask(const struct register_allocator *all
|
||||
writemask &= ~allocation->writemask;
|
||||
if (allocation->mode != mode)
|
||||
writemask = 0;
|
||||
if (allocation->vip && !vip)
|
||||
writemask = 0;
|
||||
}
|
||||
|
||||
if (!writemask)
|
||||
@ -4544,7 +4549,7 @@ static unsigned int get_available_writemask(const struct register_allocator *all
|
||||
}
|
||||
|
||||
static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx,
|
||||
unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode)
|
||||
unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode, bool vip)
|
||||
{
|
||||
struct allocation *allocation;
|
||||
|
||||
@ -4558,16 +4563,25 @@ static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *a
|
||||
allocation->first_write = first_write;
|
||||
allocation->last_read = last_read;
|
||||
allocation->mode = mode;
|
||||
allocation->vip = vip;
|
||||
|
||||
allocator->reg_count = max(allocator->reg_count, reg_idx + 1);
|
||||
}
|
||||
|
||||
/* reg_size is the number of register components to be reserved, while component_count is the number
|
||||
* of components for the register's writemask. In SM1, floats and vectors allocate the whole
|
||||
* register, even if they don't use it completely. */
|
||||
/* Allocates a register (or some components of it) within the register allocator.
|
||||
* 'reg_size' is the number of register components to be reserved.
|
||||
* 'component_count' is the number of components for the hlsl_reg's
|
||||
* writemask, which can be smaller than 'reg_size'. For instance, sm1
|
||||
* floats and vectors allocate the whole register even if they are not
|
||||
* using all components.
|
||||
* 'mode' can be provided to avoid allocating on a register that already has an
|
||||
* allocation with a different mode.
|
||||
* 'force_align' can be used so that the allocation always start in '.x'.
|
||||
* 'vip' can be used so that no new allocations can be made in the given register
|
||||
* unless they are 'vip' as well. */
|
||||
static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator,
|
||||
unsigned int first_write, unsigned int last_read, unsigned int reg_size,
|
||||
unsigned int component_count, int mode, bool force_align)
|
||||
unsigned int component_count, int mode, bool force_align, bool vip)
|
||||
{
|
||||
struct hlsl_reg ret = {.allocation_size = 1, .allocated = true};
|
||||
unsigned int required_size = force_align ? 4 : reg_size;
|
||||
@ -4581,7 +4595,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a
|
||||
for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx)
|
||||
{
|
||||
unsigned int available_writemask = get_available_writemask(allocator,
|
||||
first_write, last_read, reg_idx, mode);
|
||||
first_write, last_read, reg_idx, mode, vip);
|
||||
|
||||
if (vkd3d_popcount(available_writemask) >= pref)
|
||||
{
|
||||
@ -4591,7 +4605,8 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a
|
||||
ret.id = reg_idx;
|
||||
ret.writemask = hlsl_combine_writemasks(writemask,
|
||||
vkd3d_write_mask_from_component_count(component_count));
|
||||
record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode);
|
||||
|
||||
record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode, vip);
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
@ -4600,13 +4615,14 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a
|
||||
ret.id = allocator->reg_count;
|
||||
ret.writemask = vkd3d_write_mask_from_component_count(component_count);
|
||||
record_allocation(ctx, allocator, allocator->reg_count,
|
||||
vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode);
|
||||
vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode, vip);
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Allocate a register with writemask, while reserving reg_writemask. */
|
||||
static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator,
|
||||
unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask, int mode)
|
||||
static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx,
|
||||
struct register_allocator *allocator, unsigned int first_write, unsigned int last_read,
|
||||
uint32_t reg_writemask, uint32_t writemask, int mode, bool vip)
|
||||
{
|
||||
struct hlsl_reg ret = {0};
|
||||
uint32_t reg_idx;
|
||||
@ -4616,11 +4632,11 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct
|
||||
for (reg_idx = 0;; ++reg_idx)
|
||||
{
|
||||
if ((get_available_writemask(allocator, first_write, last_read,
|
||||
reg_idx, mode) & reg_writemask) == reg_writemask)
|
||||
reg_idx, mode, vip) & reg_writemask) == reg_writemask)
|
||||
break;
|
||||
}
|
||||
|
||||
record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode);
|
||||
record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip);
|
||||
|
||||
ret.id = reg_idx;
|
||||
ret.allocation_size = 1;
|
||||
@ -4630,7 +4646,7 @@ static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct
|
||||
}
|
||||
|
||||
static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write,
|
||||
unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode)
|
||||
unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode, bool vip)
|
||||
{
|
||||
unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1;
|
||||
unsigned int writemask;
|
||||
@ -4638,18 +4654,18 @@ static bool is_range_available(const struct register_allocator *allocator, unsig
|
||||
|
||||
for (i = 0; i < (reg_size / 4); ++i)
|
||||
{
|
||||
writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode);
|
||||
writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode, vip);
|
||||
if (writemask != VKD3DSP_WRITEMASK_ALL)
|
||||
return false;
|
||||
}
|
||||
writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode);
|
||||
writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode, vip);
|
||||
if ((writemask & last_reg_mask) != last_reg_mask)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator,
|
||||
unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode)
|
||||
unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode, bool vip)
|
||||
{
|
||||
struct hlsl_reg ret = {0};
|
||||
uint32_t reg_idx;
|
||||
@ -4657,15 +4673,15 @@ static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allo
|
||||
|
||||
for (reg_idx = 0;; ++reg_idx)
|
||||
{
|
||||
if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode))
|
||||
if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode, vip))
|
||||
break;
|
||||
}
|
||||
|
||||
for (i = 0; i < reg_size / 4; ++i)
|
||||
record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode);
|
||||
record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode, vip);
|
||||
if (reg_size % 4)
|
||||
record_allocation(ctx, allocator, reg_idx + (reg_size / 4),
|
||||
(1u << (reg_size % 4)) - 1, first_write, last_read, mode);
|
||||
(1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip);
|
||||
|
||||
ret.id = reg_idx;
|
||||
ret.allocation_size = align(reg_size, 4) / 4;
|
||||
@ -4681,9 +4697,9 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx,
|
||||
/* FIXME: We could potentially pack structs or arrays more efficiently... */
|
||||
|
||||
if (type->class <= HLSL_CLASS_VECTOR)
|
||||
return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false);
|
||||
return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx, 0, false, false);
|
||||
else
|
||||
return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0);
|
||||
return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false);
|
||||
}
|
||||
|
||||
static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type)
|
||||
@ -4861,8 +4877,8 @@ static void allocate_instr_temp_register(struct hlsl_ctx *ctx,
|
||||
}
|
||||
|
||||
if (reg_writemask)
|
||||
instr->reg = allocate_register_with_masks(ctx, allocator,
|
||||
instr->index, instr->last_read, reg_writemask, dst_writemask, 0);
|
||||
instr->reg = allocate_register_with_masks(ctx, allocator, instr->index,
|
||||
instr->last_read, reg_writemask, dst_writemask, 0, false);
|
||||
else
|
||||
instr->reg = allocate_numeric_registers_for_type(ctx, allocator,
|
||||
instr->index, instr->last_read, instr->data_type);
|
||||
@ -5183,14 +5199,15 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
|
||||
{
|
||||
if (i < bind_count)
|
||||
{
|
||||
if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i, 0) != VKD3DSP_WRITEMASK_ALL)
|
||||
if (get_available_writemask(&allocator_used, 1, UINT_MAX,
|
||||
reg_idx + i, 0, false) != VKD3DSP_WRITEMASK_ALL)
|
||||
{
|
||||
hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
|
||||
"Overlapping register() reservations on 'c%u'.", reg_idx + i);
|
||||
}
|
||||
record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0);
|
||||
record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false);
|
||||
}
|
||||
record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0);
|
||||
record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false);
|
||||
}
|
||||
|
||||
var->regs[HLSL_REGSET_NUMERIC].id = reg_idx;
|
||||
@ -5213,7 +5230,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
|
||||
|
||||
if (!var->regs[HLSL_REGSET_NUMERIC].allocated)
|
||||
{
|
||||
var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0);
|
||||
var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false);
|
||||
TRACE("Allocated %s to %s.\n", var->name,
|
||||
debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type));
|
||||
}
|
||||
@ -5256,7 +5273,8 @@ static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun
|
||||
var = entry_func->parameters.vars[i];
|
||||
if (var->is_output_semantic)
|
||||
{
|
||||
record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL, var->first_write, var->last_read, 0);
|
||||
record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL,
|
||||
var->first_write, var->last_read, 0, false);
|
||||
break;
|
||||
}
|
||||
}
|
||||
@ -5313,6 +5331,7 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
|
||||
|
||||
enum vkd3d_shader_register_type type;
|
||||
struct vkd3d_shader_version version;
|
||||
bool vip_allocation = false;
|
||||
uint32_t reg;
|
||||
bool builtin;
|
||||
|
||||
@ -5365,6 +5384,11 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
|
||||
* domains, it is allocated as if it was 'float[1]'. */
|
||||
var->force_align = true;
|
||||
}
|
||||
|
||||
if (semantic == VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX
|
||||
|| semantic == VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX
|
||||
|| semantic == VKD3D_SHADER_SV_PRIMITIVE_ID)
|
||||
vip_allocation = true;
|
||||
}
|
||||
|
||||
if (builtin)
|
||||
@ -5378,8 +5402,8 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var
|
||||
? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers);
|
||||
unsigned int reg_size = optimize ? var->data_type->dimx : 4;
|
||||
|
||||
var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1,
|
||||
UINT_MAX, reg_size, var->data_type->dimx, mode, var->force_align);
|
||||
var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX,
|
||||
reg_size, var->data_type->dimx, mode, var->force_align, vip_allocation);
|
||||
|
||||
TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v',
|
||||
var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode);
|
||||
|
@ -2818,9 +2818,9 @@ static void test_signature_reflection(void)
|
||||
{ps12_source, "ps_4_0", false, ps12_inputs, ARRAY_SIZE(ps12_inputs), ps_outputs_simple, ARRAY_SIZE(ps_outputs_simple)},
|
||||
{vs4_source, "vs_4_0", false, vs4_inputs, ARRAY_SIZE(vs4_inputs), vs4_outputs, ARRAY_SIZE(vs4_outputs)},
|
||||
{vs5_source, "vs_4_0", false, vs5_inputs, ARRAY_SIZE(vs5_inputs), vs5_outputs, ARRAY_SIZE(vs5_outputs)},
|
||||
{vs6_source, "vs_4_0", false, vs6_inputs, ARRAY_SIZE(vs6_inputs), vs6_outputs, ARRAY_SIZE(vs6_outputs), NULL, 0, true},
|
||||
{ps13_source, "ps_4_0", false, ps13_inputs, ARRAY_SIZE(ps13_inputs), ps_outputs_simple, ARRAY_SIZE(ps_outputs_simple), NULL, 0, true},
|
||||
{ps14_source, "ps_4_0", false, ps14_inputs, ARRAY_SIZE(ps14_inputs), ps_outputs_simple, ARRAY_SIZE(ps_outputs_simple), NULL, 0, true},
|
||||
{vs6_source, "vs_4_0", false, vs6_inputs, ARRAY_SIZE(vs6_inputs), vs6_outputs, ARRAY_SIZE(vs6_outputs)},
|
||||
{ps13_source, "ps_4_0", false, ps13_inputs, ARRAY_SIZE(ps13_inputs), ps_outputs_simple, ARRAY_SIZE(ps_outputs_simple)},
|
||||
{ps14_source, "ps_4_0", false, ps14_inputs, ARRAY_SIZE(ps14_inputs), ps_outputs_simple, ARRAY_SIZE(ps_outputs_simple)},
|
||||
{hs1_source, "hs_5_0", false, NULL, 0, hs1_outputs, ARRAY_SIZE(hs1_outputs), hs1_patch_constants, ARRAY_SIZE(hs1_patch_constants)},
|
||||
{hs2_source, "hs_5_0", false, NULL, 0, hs2_outputs, ARRAY_SIZE(hs2_outputs), hs2_patch_constants, ARRAY_SIZE(hs2_patch_constants)},
|
||||
{hs3_source, "hs_5_0", false, NULL, 0, hs3_outputs, ARRAY_SIZE(hs3_outputs), hs3_patch_constants, ARRAY_SIZE(hs3_patch_constants)},
|
||||
|
Loading…
x
Reference in New Issue
Block a user