vkd3d-shader/hlsl: Implement storing to a swizzled matrix.

Approved-by: Elizabeth Figura (@zfigura) Approved-by: Henri Verbeet (@hverbeet) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/895
2025-09-12 18:50:22 -07:00 · 2024-05-29 16:22:41 +01:00 · 2024-06-20 12:44:01 +02:00
parent acc9d79fbb
commit ccb6150aab
2 changed files with 111 additions and 13 deletions
--- a/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d-shader/hlsl.y
@@ -1878,12 +1878,57 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned
    return true;
 }
 static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width)
 {
    /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y.
     * components are indexed by their sources. i.e. the first component comes from the first
     * component of the rhs. */
    unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0;
    /* First, we filter the swizzle to remove components that aren't enabled by writemask. */
    for (i = 0; i < 4; ++i)
    {
        if (*writemask & (1 << i))
        {
            unsigned int s = (*swizzle >> (i * 8)) & 0xff;
            unsigned int x = s & 0xf, y = (s >> 4) & 0xf;
            unsigned int idx = x + y * 4;
            new_swizzle |= s << (bit++ * 8);
            if (new_writemask & (1 << idx))
                return false;
            new_writemask |= 1 << idx;
        }
    }
    width = bit;
    /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the
     * incoming vector. */
    bit = 0;
    for (i = 0; i < 16; ++i)
    {
        for (j = 0; j < width; ++j)
        {
            unsigned int s = (new_swizzle >> (j * 8)) & 0xff;
            unsigned int x = s & 0xf, y = (s >> 4) & 0xf;
            unsigned int idx = x + y * 4;
            if (idx == i)
                inverted |= j << (bit++ * 2);
        }
    }
    *swizzle = inverted;
    *writemask = new_writemask;
    *ret_width = width;
    return true;
 }
 static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs,
        enum parse_assign_op assign_op, struct hlsl_ir_node *rhs)
 {
    struct hlsl_type *lhs_type = lhs->data_type;
    struct hlsl_ir_node *copy;
-    unsigned int writemask = 0;
+    unsigned int writemask = 0, width = 0;
    bool matrix_writemask = false;
    if (assign_op == ASSIGN_OP_SUB)
    {
@@ -1901,7 +1946,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
    }
    if (hlsl_is_numeric_type(lhs_type))
    {
        writemask = (1 << lhs_type->dimx) - 1;
        width = lhs_type->dimx;
    }
    if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc)))
        return NULL;
@@ -1918,12 +1966,24 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
            struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs);
            struct hlsl_ir_node *new_swizzle;
            uint32_t s = swizzle->swizzle;
            unsigned int width;
-            if (lhs->data_type->class == HLSL_CLASS_MATRIX)
+            assert(!matrix_writemask);
                hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask.");
-            if (!invert_swizzle(&s, &writemask, &width))
+            if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX)
            {
                if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX)
                {
                    hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle.");
                    return NULL;
                }
                if (!invert_swizzle_matrix(&s, &writemask, &width))
                {
                    hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix.");
                    return NULL;
                }
                matrix_writemask = true;
            }
            else if (!invert_swizzle(&s, &writemask, &width))
            {
                hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask.");
                return NULL;
@@ -1971,7 +2031,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
        dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim);
-        if (writemask != ((1u << resource_type->e.resource.format->dimx) - 1))
+        if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy)
            hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK,
                    "Resource store expressions must write to all components.");
@@ -1987,12 +2047,50 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo
        hlsl_block_add_instr(block, store);
        hlsl_cleanup_deref(&resource_deref);
    }
    else if (matrix_writemask)
    {
        struct hlsl_deref deref;
        unsigned int i, j, k = 0;
        hlsl_init_deref_from_index_chain(ctx, &deref, lhs);
        for (i = 0; i < lhs->data_type->dimy; ++i)
        {
            for (j = 0; j < lhs->data_type->dimx; ++j)
            {
                struct hlsl_ir_node *load;
                struct hlsl_block store_block;
                const unsigned int idx = i * 4 + j;
                const unsigned int component = i * lhs->data_type->dimx + j;
                if (!(writemask & (1 << idx)))
                    continue;
                if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc)))
                {
                    hlsl_cleanup_deref(&deref);
                    return NULL;
                }
                if (!hlsl_new_store_component(ctx, &store_block, &deref, component, load))
                {
                    hlsl_cleanup_deref(&deref);
                    return NULL;
                }
                hlsl_block_add_block(block, &store_block);
            }
        }
        hlsl_cleanup_deref(&deref);
    }
    else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs)))
    {
        struct hlsl_ir_index *row = hlsl_ir_index(lhs);
        struct hlsl_ir_node *mat = row->val.node;
        unsigned int i, k = 0;
        assert(!matrix_writemask);
        for (i = 0; i < mat->data_type->dimx; ++i)
        {
            struct hlsl_ir_node *cell, *load, *store, *c;
--- a/tests/hlsl/swizzle-matrix.shader_test
+++ b/tests/hlsl/swizzle-matrix.shader_test
@@ -155,7 +155,7 @@ todo(glsl) draw quad
 probe (0, 0) rgba (40.0, 40.0, 40.0, 40.0)
-[pixel shader todo]
+[pixel shader]
 float3 a;
 float4 main() : sv_target
@@ -169,11 +169,11 @@ float4 main() : sv_target
 [test]
 uniform 0 float4 20 30 40 -1
-todo(sm<6) draw quad
+todo(glsl) draw quad
-todo(sm<6) probe (0,0) rgba (10.0, 20.0, 30.0, 40.0)
+probe (0,0) rgba (10.0, 20.0, 30.0, 40.0)
-[pixel shader todo]
+[pixel shader]
 float3 a;
 float4 main() : sv_target
@@ -187,12 +187,12 @@ float4 main() : sv_target
 [test]
 uniform 0 float4 20 30 80 -1
-todo(sm<6) draw quad
+todo(glsl) draw quad
-todo(sm<6) probe (0,0) rgba (80.0, 30.0, 20.0, 10.0)
+probe (0,0) rgba (80.0, 30.0, 20.0, 10.0)
 % Cannot repeat components when assigning to a swizzle.
-[pixel shader fail todo]
+[pixel shader fail]
 float4 main() : sv_target
 {
    float2x2 mat = {1, 2, 3, 4};