vkd3d-shader/hlsl: Allow prioritizing smaller writemasks when allocating signature elements.

For now this doesn't have effect yet, until semantic allocation is optimized.
Approved-by: Elizabeth Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1193
2025-09-12 18:50:22 -07:00 · 2024-09-26 20:03:52 -03:00 · 2024-10-22 20:54:15 +02:00
parent d562b03c43
commit 88dd082160
1 changed files with 25 additions and 7 deletions
--- a/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d-shader/hlsl_codegen.c
@@ -4459,6 +4459,13 @@ struct register_allocator

    /* Total number of registers allocated so far. Used to declare sm4 temp count. */
    uint32_t reg_count;
+
+    /* Special flag so allocations that can share registers prioritize those
+     * that will result in smaller writemasks.
+     * For instance, a single-register allocation would prefer to share a register
+     * whose .xy components are already allocated (becoming .z) instead of a
+     * register whose .xyz components are already allocated (becoming .w). */
+    bool prioritize_smaller_writemasks;
 };

 static unsigned int get_available_writemask(const struct register_allocator *allocator,
@@ -4517,23 +4524,31 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a
        unsigned int component_count, int mode, bool force_align)
 {
    unsigned int required_size = force_align ? 4 : reg_size;
+    unsigned int writemask = 0, pref;
    struct hlsl_reg ret = {0};
-    unsigned int writemask;
    uint32_t reg_idx;

    VKD3D_ASSERT(component_count <= reg_size);

-    for (reg_idx = 0;; ++reg_idx)
+    pref = allocator->prioritize_smaller_writemasks ? 4 : required_size;
+    for (; pref >= required_size; --pref)
    {
-        writemask = get_available_writemask(allocator, first_write, last_read, reg_idx, mode);
-
-        if (vkd3d_popcount(writemask) >= required_size)
+        for (reg_idx = 0; pref == required_size || reg_idx < allocator->reg_count; ++reg_idx)
        {
-            writemask = hlsl_combine_writemasks(writemask, (1u << reg_size) - 1);
-            break;
+            unsigned int available_writemask = get_available_writemask(allocator,
+                    first_write, last_read, reg_idx, mode);
+
+            if (vkd3d_popcount(available_writemask) >= pref)
+            {
+                writemask = hlsl_combine_writemasks(available_writemask, (1u << reg_size) - 1);
+                break;
+            }
        }
+        if (writemask)
+            break;
    }

+    VKD3D_ASSERT(vkd3d_popcount(writemask) == reg_size);
    record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode);

    ret.id = reg_idx;
@@ -5323,6 +5338,9 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_fun
    bool is_patch_constant_func = entry_func == ctx->patch_constant_func;
    struct hlsl_ir_var *var;

+    input_allocator.prioritize_smaller_writemasks = true;
+    output_allocator.prioritize_smaller_writemasks = true;
+
    LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry)
    {
        if (var->is_input_semantic)