vkd3d-shader/hlsl: Rewrite the register allocator to allow allocating in multiple passes.

We will need this in order to allocate some "special" registers: ps_1_* output, sincos output, etc.
This commit is contained in:
Zebediah Figura 2023-04-05 14:09:16 -05:00 committed by Alexandre Julliard
parent 71d8ff85c6
commit b2959739ed
Notes: Alexandre Julliard 2023-05-03 22:38:49 +02:00
Approved-by: Giovanni Mascellani (@giomasce)
Approved-by: Henri Verbeet (@hverbeet)
Approved-by: Alexandre Julliard (@julliard)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/168

View File

@ -2745,43 +2745,61 @@ static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl
struct register_allocator struct register_allocator
{ {
size_t size; size_t count, capacity;
uint32_t reg_count;
struct /* Highest register index that has been allocated.
* Used to declare sm4 temp count. */
uint32_t max_reg;
struct allocation
{ {
/* 0 if not live yet. */ uint32_t reg;
unsigned int last_read; unsigned int writemask;
} *regs; unsigned int first_write, last_read;
} *allocations;
}; };
static unsigned int get_available_writemask(struct register_allocator *allocator, static unsigned int get_available_writemask(const struct register_allocator *allocator,
unsigned int first_write, unsigned int component_idx, unsigned int reg_size) unsigned int first_write, unsigned int last_read, uint32_t reg_idx)
{ {
unsigned int i, writemask = 0, count = 0; unsigned int writemask = VKD3DSP_WRITEMASK_ALL;
size_t i;
for (i = 0; i < 4; ++i) for (i = 0; i < allocator->count; ++i)
{ {
if (allocator->regs[component_idx + i].last_read <= first_write) const struct allocation *allocation = &allocator->allocations[i];
{
writemask |= 1u << i; /* We do not overlap if first write == last read:
if (++count == reg_size) * this is the case where we are allocating the result of that
* expression, e.g. "add r0, r0, r1". */
if (allocation->reg == reg_idx
&& first_write < allocation->last_read && last_read > allocation->first_write)
writemask &= ~allocation->writemask;
if (!writemask)
break;
}
return writemask; return writemask;
} }
}
return 0; static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator,
} uint32_t reg_idx, unsigned int writemask, unsigned int first_write, unsigned int last_read)
static bool resize_liveness(struct hlsl_ctx *ctx, struct register_allocator *allocator, size_t new_count)
{ {
size_t old_capacity = allocator->size; struct allocation *allocation;
if (!hlsl_array_reserve(ctx, (void **)&allocator->regs, &allocator->size, new_count, sizeof(*allocator->regs))) if (!hlsl_array_reserve(ctx, (void **)&allocator->allocations, &allocator->capacity,
return false; allocator->count + 1, sizeof(*allocator->allocations)))
return;
if (allocator->size > old_capacity) allocation = &allocator->allocations[allocator->count++];
memset(allocator->regs + old_capacity, 0, (allocator->size - old_capacity) * sizeof(*allocator->regs)); allocation->reg = reg_idx;
return true; allocation->writemask = writemask;
allocation->first_write = first_write;
allocation->last_read = last_read;
allocator->max_reg = max(allocator->max_reg, reg_idx);
} }
/* reg_size is the number of register components to be reserved, while component_count is the number /* reg_size is the number of register components to be reserved, while component_count is the number
@ -2791,42 +2809,39 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a
unsigned int first_write, unsigned int last_read, unsigned int reg_size, unsigned int first_write, unsigned int last_read, unsigned int reg_size,
unsigned int component_count) unsigned int component_count)
{ {
unsigned int component_idx, writemask, i;
struct hlsl_reg ret = {0}; struct hlsl_reg ret = {0};
unsigned int writemask;
uint32_t reg_idx;
assert(component_count <= reg_size); assert(component_count <= reg_size);
for (component_idx = 0; component_idx < allocator->size; component_idx += 4) for (reg_idx = 0;; ++reg_idx)
{ {
if ((writemask = get_available_writemask(allocator, first_write, component_idx, reg_size))) writemask = get_available_writemask(allocator, first_write, last_read, reg_idx);
if (vkd3d_popcount(writemask) >= reg_size)
{
writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1);
break; break;
} }
if (component_idx == allocator->size)
{
if (!resize_liveness(ctx, allocator, component_idx + 4))
return ret;
writemask = (1u << reg_size) - 1;
} }
for (i = 0; i < 4; ++i)
{ record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read);
if (writemask & (1u << i))
allocator->regs[component_idx + i].last_read = last_read; ret.id = reg_idx;
}
ret.id = component_idx / 4;
ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1); ret.writemask = hlsl_combine_writemasks(writemask, (1u << component_count) - 1);
ret.allocated = true; ret.allocated = true;
allocator->reg_count = max(allocator->reg_count, ret.id + 1);
return ret; return ret;
} }
static bool is_range_available(struct register_allocator *allocator, unsigned int first_write, static bool is_range_available(const struct register_allocator *allocator,
unsigned int component_idx, unsigned int reg_size) unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size)
{ {
unsigned int i; uint32_t i;
for (i = 0; i < reg_size; i += 4) for (i = 0; i < (reg_size / 4); ++i)
{ {
if (!get_available_writemask(allocator, first_write, component_idx + i, 4)) if (get_available_writemask(allocator, first_write, last_read, reg_idx + i) != VKD3DSP_WRITEMASK_ALL)
return false; return false;
} }
return true; return true;
@ -2835,23 +2850,21 @@ static bool is_range_available(struct register_allocator *allocator, unsigned in
static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator, static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator,
unsigned int first_write, unsigned int last_read, unsigned int reg_size) unsigned int first_write, unsigned int last_read, unsigned int reg_size)
{ {
unsigned int i, component_idx;
struct hlsl_reg ret = {0}; struct hlsl_reg ret = {0};
uint32_t reg_idx;
unsigned int i;
for (component_idx = 0; component_idx < allocator->size; component_idx += 4) for (reg_idx = 0;; ++reg_idx)
{ {
if (is_range_available(allocator, first_write, component_idx, if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size))
min(reg_size, allocator->size - component_idx)))
break; break;
} }
if (!resize_liveness(ctx, allocator, component_idx + reg_size))
return ret;
for (i = 0; i < reg_size; ++i) for (i = 0; i < reg_size / 4; ++i)
allocator->regs[component_idx + i].last_read = last_read; record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read);
ret.id = component_idx / 4;
ret.id = reg_idx;
ret.allocated = true; ret.allocated = true;
allocator->reg_count = max(allocator->reg_count, ret.id + align(reg_size, 4));
return ret; return ret;
} }
@ -3075,7 +3088,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi
} }
} }
vkd3d_free(allocator.regs); vkd3d_free(allocator.allocations);
} }
/* Simple greedy temporary register allocation pass that just assigns a unique /* Simple greedy temporary register allocation pass that just assigns a unique
@ -3086,8 +3099,8 @@ static void allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functio
{ {
struct register_allocator allocator = {0}; struct register_allocator allocator = {0};
allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator); allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator);
ctx->temp_count = allocator.reg_count; ctx->temp_count = allocator.max_reg + 1;
vkd3d_free(allocator.regs); vkd3d_free(allocator.allocations);
} }
static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output) static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, unsigned int *counter, bool output)