diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index a2c89b15..9c75c87d 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -1878,12 +1878,57 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned return true; } +static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width) +{ + /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y. + * components are indexed by their sources. i.e. the first component comes from the first + * component of the rhs. */ + unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; + + /* First, we filter the swizzle to remove components that aren't enabled by writemask. */ + for (i = 0; i < 4; ++i) + { + if (*writemask & (1 << i)) + { + unsigned int s = (*swizzle >> (i * 8)) & 0xff; + unsigned int x = s & 0xf, y = (s >> 4) & 0xf; + unsigned int idx = x + y * 4; + new_swizzle |= s << (bit++ * 8); + if (new_writemask & (1 << idx)) + return false; + new_writemask |= 1 << idx; + } + } + width = bit; + + /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the + * incoming vector. */ + bit = 0; + for (i = 0; i < 16; ++i) + { + for (j = 0; j < width; ++j) + { + unsigned int s = (new_swizzle >> (j * 8)) & 0xff; + unsigned int x = s & 0xf, y = (s >> 4) & 0xf; + unsigned int idx = x + y * 4; + if (idx == i) + inverted |= j << (bit++ * 2); + } + } + + *swizzle = inverted; + *writemask = new_writemask; + *ret_width = width; + return true; +} + static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) { struct hlsl_type *lhs_type = lhs->data_type; struct hlsl_ir_node *copy; - unsigned int writemask = 0; + unsigned int writemask = 0, width = 0; + bool matrix_writemask = false; if (assign_op == ASSIGN_OP_SUB) { @@ -1901,7 +1946,10 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo } if (hlsl_is_numeric_type(lhs_type)) + { writemask = (1 << lhs_type->dimx) - 1; + width = lhs_type->dimx; + } if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) return NULL; @@ -1918,12 +1966,24 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); struct hlsl_ir_node *new_swizzle; uint32_t s = swizzle->swizzle; - unsigned int width; - if (lhs->data_type->class == HLSL_CLASS_MATRIX) - hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); + assert(!matrix_writemask); - if (!invert_swizzle(&s, &writemask, &width)) + if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) + { + if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) + { + hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); + return NULL; + } + if (!invert_swizzle_matrix(&s, &writemask, &width)) + { + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); + return NULL; + } + matrix_writemask = true; + } + else if (!invert_swizzle(&s, &writemask, &width)) { hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); return NULL; @@ -1971,7 +2031,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); - if (writemask != ((1u << resource_type->e.resource.format->dimx) - 1)) + if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy) hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Resource store expressions must write to all components."); @@ -1987,12 +2047,50 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&resource_deref); } + else if (matrix_writemask) + { + struct hlsl_deref deref; + unsigned int i, j, k = 0; + + hlsl_init_deref_from_index_chain(ctx, &deref, lhs); + + for (i = 0; i < lhs->data_type->dimy; ++i) + { + for (j = 0; j < lhs->data_type->dimx; ++j) + { + struct hlsl_ir_node *load; + struct hlsl_block store_block; + const unsigned int idx = i * 4 + j; + const unsigned int component = i * lhs->data_type->dimx + j; + + if (!(writemask & (1 << idx))) + continue; + + if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) + { + hlsl_cleanup_deref(&deref); + return NULL; + } + + if (!hlsl_new_store_component(ctx, &store_block, &deref, component, load)) + { + hlsl_cleanup_deref(&deref); + return NULL; + } + hlsl_block_add_block(block, &store_block); + } + } + + hlsl_cleanup_deref(&deref); + } else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) { struct hlsl_ir_index *row = hlsl_ir_index(lhs); struct hlsl_ir_node *mat = row->val.node; unsigned int i, k = 0; + assert(!matrix_writemask); + for (i = 0; i < mat->data_type->dimx; ++i) { struct hlsl_ir_node *cell, *load, *store, *c; diff --git a/tests/hlsl/swizzle-matrix.shader_test b/tests/hlsl/swizzle-matrix.shader_test index 7be73a08..070322fe 100644 --- a/tests/hlsl/swizzle-matrix.shader_test +++ b/tests/hlsl/swizzle-matrix.shader_test @@ -155,7 +155,7 @@ todo(glsl) draw quad probe (0, 0) rgba (40.0, 40.0, 40.0, 40.0) -[pixel shader todo] +[pixel shader] float3 a; float4 main() : sv_target @@ -169,11 +169,11 @@ float4 main() : sv_target [test] uniform 0 float4 20 30 40 -1 -todo(sm<6) draw quad -todo(sm<6) probe (0,0) rgba (10.0, 20.0, 30.0, 40.0) +todo(glsl) draw quad +probe (0,0) rgba (10.0, 20.0, 30.0, 40.0) -[pixel shader todo] +[pixel shader] float3 a; float4 main() : sv_target @@ -187,12 +187,12 @@ float4 main() : sv_target [test] uniform 0 float4 20 30 80 -1 -todo(sm<6) draw quad -todo(sm<6) probe (0,0) rgba (80.0, 30.0, 20.0, 10.0) +todo(glsl) draw quad +probe (0,0) rgba (80.0, 30.0, 20.0, 10.0) % Cannot repeat components when assigning to a swizzle. -[pixel shader fail todo] +[pixel shader fail] float4 main() : sv_target { float2x2 mat = {1, 2, 3, 4};