vkd3d-shader/hlsl: Replace loads with constants in copy prop.

If a hlsl_ir_load loads a variable whose components are stored from different
instructions, copy propagation doesn't replace it.

But if all these instructions are constants (which currently is the case
for value constructors), the load could be replaced with a constant value.
Which is expected in some other instructions, e.g. texel_offsets when
using aoffimmi modifiers.

For instance, this shader:

```
sampler s;
Texture2D t;

float4 main() : sv_target
{
    return t.Gather(s, float2(0.6, 0.6), int2(0, 0));
}
```

results in the following IR before applying the patch:
```
  float | 6.00000024e-01
  float | 6.00000024e-01
   uint | 0
        | = (<constructor-2>[@4].x @2)
   uint | 1
        | = (<constructor-2>[@6].x @3)
 float2 | <constructor-2>
    int | 0
    int | 0
   uint | 0
        | = (<constructor-5>[@11].x @9)
   uint | 1
        | = (<constructor-5>[@13].x @10)
   int2 | <constructor-5>
 float4 | gather_red(resource = t, sampler = s, coords = @8, offset = @15)
        | return
        | = (<output-sv_target0> @16)
```

and this IR afterwards:
```
 float2 | {6.00000024e-01 6.00000024e-01 }
   int2 | {0 0 }
 float4 | gather_red(resource = t, sampler = s, coords = @2, offset = @3)
        | return
        | = (<output-sv_target0> @4)
```
This commit is contained in:
Francisco Casas 2022-11-17 17:49:28 -03:00 committed by Alexandre Julliard
parent 8c2b8ff245
commit c2a7a40d3a
Notes: Alexandre Julliard 2023-01-24 22:27:58 +01:00
Approved-by: Giovanni Mascellani (@giomasce)
Approved-by: Zebediah Figura (@zfigura)
Approved-by: Henri Verbeet (@hverbeet)
Approved-by: Alexandre Julliard (@julliard)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/51
5 changed files with 110 additions and 25 deletions

View File

@ -543,6 +543,52 @@ static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, v
return false;
}
/*
* Copy propagation. The basic idea is to recognize instruction sequences of the
* form:
*
* 2: <any instruction>
* 3: v = @2
* 4: load(v)
*
* and replace the load (@4) with the original instruction (@2).
* This works for multiple components, even if they're written using separate
* store instructions, as long as the rhs is the same in every case. This basic
* detection is implemented by copy_propagation_replace_with_single_instr().
*
* We use the same infrastructure to implement a more specialized
* transformation. We recognize sequences of the form:
*
* 2: 123
* 3: var.x = @2
* 4: 345
* 5: var.y = @4
* 6: load(var.xy)
*
* where the load (@6) originates from different sources but that are constant,
* and transform it into a single constant vector. This latter pass is done
* by copy_propagation_replace_with_constant_vector().
*
* This is a specialized form of vectorization, and begs the question: why does
* the load need to be involved? Can we just vectorize the stores into a single
* instruction, and then use "normal" copy-prop to convert that into a single
* vector?
*
* In general, the answer is yes, but there is a special case which necessitates
* the use of this transformation: non-uniform control flow. Copy-prop can act
* across some control flow, and in cases like the following:
*
* 2: 123
* 3: var.x = @2
* 4: if (...)
* 5: 456
* 6: var.y = @5
* 7: load(var.xy)
*
* we can copy-prop the load (@7) into a constant vector {123, 456}, but we
* cannot easily vectorize the stores @3 and @6.
*/
enum copy_propagation_value_state
{
VALUE_STATE_NOT_WRITTEN = 0,
@ -772,6 +818,42 @@ static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx,
return true;
}
static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx,
const struct copy_propagation_state *state, struct hlsl_ir_load *load)
{
const struct hlsl_ir_var *var = load->src.var;
union hlsl_constant_value values[4] = {0};
struct hlsl_ir_node *instr = &load->node;
struct hlsl_ir_constant *cons;
unsigned int start, count, i;
if (!hlsl_component_index_range_from_deref(ctx, &load->src, &start, &count))
return false;
for (i = 0; i < count; ++i)
{
struct copy_propagation_value *value = copy_propagation_get_value(state, var, start + i);
if (!value || value->node->type != HLSL_IR_CONSTANT)
return false;
values[i] = hlsl_ir_constant(value->node)->value[value->component];
}
if (!(cons = hlsl_new_constant(ctx, instr->data_type, &instr->loc)))
return false;
cons->value[0] = values[0];
cons->value[1] = values[1];
cons->value[2] = values[2];
cons->value[3] = values[3];
list_add_before(&instr->entry, &cons->node.entry);
TRACE("Load from %s[%u-%u] turned into a constant %p.\n", var->name, start, start + count, cons);
hlsl_replace_node(instr, &cons->node);
return true;
}
static bool copy_propagation_transform_load(struct hlsl_ctx *ctx,
struct hlsl_ir_load *load, struct copy_propagation_state *state)
{
@ -792,6 +874,9 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx,
return false;
}
if (copy_propagation_replace_with_constant_vector(ctx, state, load))
return true;
if (copy_propagation_replace_with_single_instr(ctx, state, load))
return true;

View File

@ -29,7 +29,7 @@ draw quad
probe all rgba (0.2, 0.2, 0.2, 0.1)
[pixel shader todo]
[pixel shader]
Texture2D tex;
struct foo
@ -48,11 +48,11 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (31.1, 41.1, 51.1, 61.1) 1
draw quad
probe all rgba (31.1, 41.1, 51.1, 61.1) 1
[pixel shader todo]
[pixel shader]
Texture2D tex1;
Texture2D tex2;

View File

@ -132,7 +132,7 @@ float4 main() : sv_target
}
[pixel shader todo]
[pixel shader]
Texture2D tex;
uniform float f;
@ -153,8 +153,8 @@ float4 main() : sv_target
[test]
uniform 0 float 10.0
todo draw quad
todo probe (0, 0) rgba (11.0, 12.0, 13.0, 11.0)
draw quad
probe (0, 0) rgba (11.0, 12.0, 13.0, 11.0)
[pixel shader fail]

View File

@ -12,7 +12,7 @@ size (3, 3)
0.0 0.2 0.0 0.4 0.1 0.2 0.5 0.0 0.2 0.2 0.0 0.4
[pixel shader todo]
[pixel shader]
sampler s;
Texture2D t;
@ -22,11 +22,11 @@ float4 main() : sv_target
}
[test]
todo draw quad
draw quad
probe all rgba (0.1, 0.2, 0.5, 0.0)
[pixel shader todo]
[pixel shader]
sampler s;
Texture2D t;
@ -36,11 +36,11 @@ float4 main() : sv_target
}
[test]
todo draw quad
draw quad
probe all rgba (0.2, 0.2, 0.0, 0.4)
[pixel shader todo]
[pixel shader]
sampler s;
Texture2D t;
@ -50,5 +50,5 @@ float4 main() : sv_target
}
[test]
todo draw quad
draw quad
probe all rgba (0.0, 0.2, 0.0, 0.4)

View File

@ -8,7 +8,7 @@ size (3, 3)
0 2 0 1 1 2 0 1 2 2 0 1
[pixel shader todo]
[pixel shader]
Texture2D t;
float4 main(float4 pos : sv_position) : sv_target
@ -18,14 +18,14 @@ float4 main(float4 pos : sv_position) : sv_target
[test]
todo draw quad
todo probe (0, 0) rgba (0, 1, 0, 1)
todo probe (1, 0) rgba (1, 1, 0, 1)
todo probe (0, 1) rgba (0, 2, 0, 1)
todo probe (1, 1) rgba (1, 2, 0, 1)
draw quad
probe (0, 0) rgba (0, 1, 0, 1)
probe (1, 0) rgba (1, 1, 0, 1)
probe (0, 1) rgba (0, 2, 0, 1)
probe (1, 1) rgba (1, 2, 0, 1)
[pixel shader todo]
[pixel shader]
Texture2D t;
float4 main(float4 pos : sv_position) : sv_target
@ -35,11 +35,11 @@ float4 main(float4 pos : sv_position) : sv_target
[test]
todo draw quad
todo probe (3, 0) rgba (1, 0, 0, 1)
todo probe (4, 0) rgba (2, 0, 0, 1)
todo probe (3, 1) rgba (1, 1, 0, 1)
todo probe (4, 1) rgba (2, 1, 0, 1)
draw quad
probe (3, 0) rgba (1, 0, 0, 1)
probe (4, 0) rgba (2, 0, 0, 1)
probe (3, 1) rgba (1, 1, 0, 1)
probe (4, 1) rgba (2, 1, 0, 1)
[pixel shader fail]