vkd3d-shader/hlsl: Inline function calls.

This commit is contained in:
Zebediah Figura 2021-09-11 16:56:04 -05:00 committed by Alexandre Julliard
parent 6177cea31f
commit 4b944517b7
Notes: Alexandre Julliard 2023-02-13 22:20:34 +01:00
Approved-by: Giovanni Mascellani (@giomasce)
Approved-by: Francisco Casas (@fcasas)
Approved-by: Henri Verbeet (@hverbeet)
Approved-by: Alexandre Julliard (@julliard)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/85
12 changed files with 383 additions and 66 deletions

View File

@ -1204,6 +1204,295 @@ struct hlsl_ir_loop *hlsl_new_loop(struct hlsl_ctx *ctx, struct vkd3d_shader_loc
return loop;
}
struct clone_instr_map
{
struct
{
const struct hlsl_ir_node *src;
struct hlsl_ir_node *dst;
} *instrs;
size_t count, capacity;
};
static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx,
struct clone_instr_map *map, const struct hlsl_ir_node *instr);
static bool clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block,
const struct hlsl_block *src_block, struct clone_instr_map *map)
{
const struct hlsl_ir_node *src;
struct hlsl_ir_node *dst;
LIST_FOR_EACH_ENTRY(src, &src_block->instrs, struct hlsl_ir_node, entry)
{
if (!(dst = clone_instr(ctx, map, src)))
{
hlsl_free_instr_list(&dst_block->instrs);
return false;
}
list_add_tail(&dst_block->instrs, &dst->entry);
if (!list_empty(&src->uses))
{
if (!vkd3d_array_reserve((void **)&map->instrs, &map->capacity, map->count + 1, sizeof(*map->instrs)))
{
hlsl_free_instr_list(&dst_block->instrs);
return false;
}
map->instrs[map->count].dst = dst;
map->instrs[map->count].src = src;
++map->count;
}
}
return true;
}
static struct hlsl_ir_node *map_instr(const struct clone_instr_map *map, struct hlsl_ir_node *src)
{
size_t i;
if (!src)
return NULL;
for (i = 0; i < map->count; ++i)
{
if (map->instrs[i].src == src)
return map->instrs[i].dst;
}
/* The block passed to hlsl_clone_block() should have been free of external
* references. */
vkd3d_unreachable();
}
static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map,
struct hlsl_deref *dst, const struct hlsl_deref *src)
{
unsigned int i;
assert(!src->offset.node);
if (!init_deref(ctx, dst, src->var, src->path_len))
return false;
for (i = 0; i < src->path_len; ++i)
hlsl_src_from_node(&dst->path[i], map_instr(map, src->path[i].node));
return true;
}
static void clone_src(struct clone_instr_map *map, struct hlsl_src *dst, const struct hlsl_src *src)
{
hlsl_src_from_node(dst, map_instr(map, src->node));
}
static struct hlsl_ir_node *clone_call(struct hlsl_ctx *ctx, struct hlsl_ir_call *src)
{
return hlsl_new_call(ctx, src->decl, &src->node.loc);
}
static struct hlsl_ir_node *clone_constant(struct hlsl_ctx *ctx, struct hlsl_ir_constant *src)
{
struct hlsl_ir_constant *dst;
if (!(dst = hlsl_new_constant(ctx, src->node.data_type, &src->node.loc)))
return NULL;
memcpy(dst->value, src->value, sizeof(src->value));
return &dst->node;
}
static struct hlsl_ir_node *clone_expr(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_expr *src)
{
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS];
unsigned int i;
for (i = 0; i < ARRAY_SIZE(operands); ++i)
operands[i] = map_instr(map, src->operands[i].node);
return hlsl_new_expr(ctx, src->op, operands, src->node.data_type, &src->node.loc);
}
static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_if *src)
{
struct hlsl_ir_if *dst;
if (!(dst = hlsl_new_if(ctx, map_instr(map, src->condition.node), src->node.loc)))
return NULL;
if (!clone_block(ctx, &dst->then_instrs, &src->then_instrs, map)
|| !clone_block(ctx, &dst->else_instrs, &src->else_instrs, map))
{
hlsl_free_instr(&dst->node);
return NULL;
}
return &dst->node;
}
static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src)
{
struct hlsl_ir_jump *dst;
if (!(dst = hlsl_new_jump(ctx, src->type, src->node.loc)))
return NULL;
return &dst->node;
}
static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src)
{
struct hlsl_ir_load *dst;
if (!(dst = hlsl_alloc(ctx, sizeof(*dst))))
return NULL;
init_node(&dst->node, HLSL_IR_LOAD, src->node.data_type, &src->node.loc);
if (!clone_deref(ctx, map, &dst->src, &src->src))
{
vkd3d_free(dst);
return NULL;
}
return &dst->node;
}
static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_loop *src)
{
struct hlsl_ir_loop *dst;
if (!(dst = hlsl_new_loop(ctx, src->node.loc)))
return NULL;
if (!clone_block(ctx, &dst->body, &src->body, map))
{
hlsl_free_instr(&dst->node);
return NULL;
}
return &dst->node;
}
static struct hlsl_ir_node *clone_resource_load(struct hlsl_ctx *ctx,
struct clone_instr_map *map, struct hlsl_ir_resource_load *src)
{
struct hlsl_ir_resource_load *dst;
if (!(dst = hlsl_alloc(ctx, sizeof(*dst))))
return NULL;
init_node(&dst->node, HLSL_IR_RESOURCE_LOAD, src->node.data_type, &src->node.loc);
dst->load_type = src->load_type;
if (!clone_deref(ctx, map, &dst->resource, &src->resource))
{
vkd3d_free(dst);
return NULL;
}
if (!clone_deref(ctx, map, &dst->sampler, &src->sampler))
{
hlsl_cleanup_deref(&dst->resource);
vkd3d_free(dst);
return NULL;
}
clone_src(map, &dst->coords, &src->coords);
clone_src(map, &dst->lod, &src->lod);
clone_src(map, &dst->texel_offset, &src->texel_offset);
return &dst->node;
}
static struct hlsl_ir_node *clone_resource_store(struct hlsl_ctx *ctx,
struct clone_instr_map *map, struct hlsl_ir_resource_store *src)
{
struct hlsl_ir_resource_store *dst;
if (!(dst = hlsl_alloc(ctx, sizeof(*dst))))
return NULL;
init_node(&dst->node, HLSL_IR_RESOURCE_STORE, NULL, &src->node.loc);
if (!clone_deref(ctx, map, &dst->resource, &src->resource))
{
vkd3d_free(dst);
return NULL;
}
clone_src(map, &dst->coords, &src->coords);
clone_src(map, &dst->value, &src->value);
return &dst->node;
}
static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_store *src)
{
struct hlsl_ir_store *dst;
if (!(dst = hlsl_alloc(ctx, sizeof(*dst))))
return NULL;
init_node(&dst->node, HLSL_IR_STORE, NULL, &src->node.loc);
if (!clone_deref(ctx, map, &dst->lhs, &src->lhs))
{
vkd3d_free(dst);
return NULL;
}
clone_src(map, &dst->rhs, &src->rhs);
dst->writemask = src->writemask;
return &dst->node;
}
static struct hlsl_ir_node *clone_swizzle(struct hlsl_ctx *ctx,
struct clone_instr_map *map, struct hlsl_ir_swizzle *src)
{
struct hlsl_ir_swizzle *dst;
if (!(dst = hlsl_new_swizzle(ctx, src->swizzle, src->node.data_type->dimx,
map_instr(map, src->val.node), &src->node.loc)))
return NULL;
return &dst->node;
}
static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx,
struct clone_instr_map *map, const struct hlsl_ir_node *instr)
{
switch (instr->type)
{
case HLSL_IR_CALL:
return clone_call(ctx, hlsl_ir_call(instr));
case HLSL_IR_CONSTANT:
return clone_constant(ctx, hlsl_ir_constant(instr));
case HLSL_IR_EXPR:
return clone_expr(ctx, map, hlsl_ir_expr(instr));
case HLSL_IR_IF:
return clone_if(ctx, map, hlsl_ir_if(instr));
case HLSL_IR_JUMP:
return clone_jump(ctx, hlsl_ir_jump(instr));
case HLSL_IR_LOAD:
return clone_load(ctx, map, hlsl_ir_load(instr));
case HLSL_IR_LOOP:
return clone_loop(ctx, map, hlsl_ir_loop(instr));
case HLSL_IR_RESOURCE_LOAD:
return clone_resource_load(ctx, map, hlsl_ir_resource_load(instr));
case HLSL_IR_RESOURCE_STORE:
return clone_resource_store(ctx, map, hlsl_ir_resource_store(instr));
case HLSL_IR_STORE:
return clone_store(ctx, map, hlsl_ir_store(instr));
case HLSL_IR_SWIZZLE:
return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr));
}
vkd3d_unreachable();
}
bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block)
{
struct clone_instr_map map = {0};
bool ret;
ret = clone_block(ctx, dst_block, src_block, &map);
vkd3d_free(map.instrs);
return ret;
}
struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx,
struct hlsl_type *return_type, const struct hlsl_func_parameters *parameters,
const struct hlsl_semantic *semantic, const struct vkd3d_shader_location *loc)

View File

@ -975,6 +975,8 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type);
void hlsl_add_function(struct hlsl_ctx *ctx, char *name, struct hlsl_ir_function_decl *decl);
bool hlsl_add_var(struct hlsl_ctx *ctx, struct hlsl_ir_var *decl, bool local_var);
bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block);
void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func);
int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,

View File

@ -678,6 +678,32 @@ static void lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun
}
}
/* Remove HLSL_IR_CALL instructions by inlining them. */
static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
const struct hlsl_ir_function_decl *decl;
struct hlsl_ir_call *call;
struct hlsl_block block;
if (instr->type != HLSL_IR_CALL)
return false;
call = hlsl_ir_call(instr);
decl = call->decl;
if (!decl->has_body)
hlsl_error(ctx, &call->node.loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED,
"Function \"%s\" is not defined.", decl->func->name);
list_init(&block.instrs);
if (!hlsl_clone_block(ctx, &block, &decl->body))
return false;
list_move_before(&call->node.entry, &block.instrs);
list_remove(&call->node.entry);
hlsl_free_instr(&call->node);
return true;
}
/* Lower casts from vec1 to vecN to swizzles. */
static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
{
@ -2198,8 +2224,8 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
switch (instr->type)
{
case HLSL_IR_CALL:
FIXME("We should have inlined all calls before computing liveness.\n");
break;
/* We should have inlined all calls before computing liveness. */
vkd3d_unreachable();
case HLSL_IR_STORE:
{
@ -3137,6 +3163,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry
lower_return(ctx, entry_func, body, false);
while (transform_ir(ctx, lower_calls, body, NULL));
LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry)
{
if (var->storage_modifiers & HLSL_STORAGE_UNIFORM)

View File

@ -895,8 +895,7 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b
switch (instr->type)
{
case HLSL_IR_CALL:
hlsl_fixme(ctx, &instr->loc, "Inline call instructions.\n");
break;
vkd3d_unreachable();
case HLSL_IR_CONSTANT:
write_sm1_constant(ctx, buffer, instr);

View File

@ -2361,8 +2361,7 @@ static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *
switch (instr->type)
{
case HLSL_IR_CALL:
hlsl_fixme(ctx, &instr->loc, "Inline call instructions.\n");
break;
vkd3d_unreachable();
case HLSL_IR_CONSTANT:
write_sm4_constant(ctx, buffer, hlsl_ir_constant(instr));

View File

@ -1,6 +1,6 @@
% Test early return from a user-defined function.
[pixel shader todo]
[pixel shader]
float func(out float o)
{
@ -29,10 +29,10 @@ float4 main() : sv_target
}
[test]
todo draw quad
draw quad
probe all rgba (0.2, 0.1, 0.8, 0.5);
[pixel shader todo]
[pixel shader]
uniform float f;
@ -80,19 +80,19 @@ float4 main() : sv_target
[test]
uniform 0 float 0.1
todo draw quad
draw quad
probe all rgba (0.3, 0.2, 0.6, 0.3) 1
uniform 0 float 0.4
todo draw quad
draw quad
probe all rgba (0.6, 0.5, 0.6, 0.3) 1
uniform 0 float 0.6
todo draw quad
draw quad
probe all rgba (0.6, 0.5, 0.4, 0.5) 1
uniform 0 float 0.8
todo draw quad
draw quad
probe all rgba (0.8, 0.7, 0.4, 0.5) 1
[pixel shader todo]
[pixel shader]
uniform float f;
@ -136,13 +136,13 @@ float4 main() : sv_target
[test]
uniform 0 float 0.1
todo draw quad
draw quad
probe all rgba (0.2, 0.1, 0.2, 0.1) 1
uniform 0 float 0.5
todo draw quad
draw quad
probe all rgba (0.5, 0.4, 1.0, 0.9) 1
uniform 0 float 0.9
todo draw quad
draw quad
probe all rgba (1.0, 0.9, 1.0, 0.6) 1
[pixel shader todo]

View File

@ -1,4 +1,4 @@
[pixel shader fail todo]
[pixel shader fail]
float4 func();
@ -121,7 +121,7 @@ void func()
{
}
[pixel shader todo]
[pixel shader]
void func();
@ -179,7 +179,7 @@ float4 main() : sv_target
todo draw quad
todo probe all rgba (0.1, 0.2, 0.3, 0.4)
[pixel shader todo]
[pixel shader]
float func(in float a, out float b, inout float c)
{
@ -200,10 +200,10 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.5, 0.6, 0.7, 0)
draw quad
probe all rgba (0.5, 0.6, 0.7, 0)
[pixel shader todo]
[pixel shader]
void func(in float a, inout float2 b)
{
@ -221,8 +221,8 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.6, 0.1, 0.5, 0)
draw quad
probe all rgba (0.6, 0.1, 0.5, 0)
% Recursion is forbidden.

View File

@ -153,7 +153,7 @@ static int x = 1;
[numthreads((x = 2), 1, 1)]
void main() {}
[compute shader todo]
[compute shader]
static int x = 1;

View File

@ -38,7 +38,7 @@ float4x1 main() : sv_target
draw quad
probe all rgba (0.4, 0.3, 0.2, 0.1)
[pixel shader todo]
[pixel shader]
float3 func()
{
return float3x1(0.4, 0.3, 0.2);
@ -50,10 +50,10 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.4, 0.3, 0.2, 0.0)
draw quad
probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader todo]
[pixel shader]
float3 func()
{
return float1x3(0.4, 0.3, 0.2);
@ -65,10 +65,10 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.4, 0.3, 0.2, 0.0)
draw quad
probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader todo]
[pixel shader]
float1x3 func()
{
return float3(0.4, 0.3, 0.2);
@ -80,10 +80,10 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.4, 0.3, 0.2, 0.0)
draw quad
probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader todo]
[pixel shader]
float3x1 func()
{
return float3(0.4, 0.3, 0.2);
@ -95,8 +95,8 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.4, 0.3, 0.2, 0.0)
draw quad
probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader fail]
float3x1 func()
@ -120,7 +120,7 @@ float4 main() : sv_target
return float4(func(), 0.0);
}
[pixel shader todo]
[pixel shader]
float3 func()
{
return float4(0.4, 0.3, 0.2, 0.1);
@ -132,10 +132,10 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.4, 0.3, 0.2, 0.0)
draw quad
probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader todo]
[pixel shader]
float3 func()
{
return float4x1(0.4, 0.3, 0.2, 0.1);
@ -147,10 +147,10 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.4, 0.3, 0.2, 0.0)
draw quad
probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader todo]
[pixel shader]
float3 func()
{
return float1x4(0.4, 0.3, 0.2, 0.1);
@ -162,8 +162,8 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.4, 0.3, 0.2, 0.0)
draw quad
probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader fail todo]
float3x1 func()
@ -176,7 +176,7 @@ float4 main() : sv_target
return float4(func(), 0.0);
}
[pixel shader todo]
[pixel shader]
float3x1 func()
{
return float4x1(0.4, 0.3, 0.2, 0.1);
@ -188,8 +188,8 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.4, 0.3, 0.2, 0.0)
draw quad
probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader fail]
float3x1 func()
@ -202,7 +202,7 @@ float4 main() : sv_target
return float4(func(), 0.0);
}
[pixel shader todo]
[pixel shader]
float1x3 func()
{
return float4(0.4, 0.3, 0.2, 0.1);
@ -214,8 +214,8 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.4, 0.3, 0.2, 0.0)
draw quad
probe all rgba (0.4, 0.3, 0.2, 0.0)
[pixel shader fail]
float1x3 func()
@ -228,7 +228,7 @@ float4 main() : sv_target
return float4(func(), 0.0);
}
[pixel shader todo]
[pixel shader]
float1x3 func()
{
return float1x4(0.4, 0.3, 0.2, 0.1);
@ -240,5 +240,5 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.4, 0.3, 0.2, 0.0)
draw quad
probe all rgba (0.4, 0.3, 0.2, 0.0)

View File

@ -1,4 +1,4 @@
[pixel shader todo]
[pixel shader]
float myfunc()
{
return 0.6;
@ -12,8 +12,8 @@ float4 main() : sv_target
}
[test]
todo draw quad
todo probe all rgba (0.8, 0.0, 0.0, 0.0)
draw quad
probe all rgba (0.8, 0.0, 0.0, 0.0)
[pixel shader fail]

View File

@ -1,4 +1,4 @@
[pixel shader todo]
[pixel shader]
void sub2(in uniform float4 i, out float4 o)
{
o = i;
@ -17,5 +17,5 @@ void main(in uniform float4 a, uniform float4 b, out float4 o : sv_target)
[test]
uniform 0 float4 0.1 0.0 0.0 0.0
uniform 4 float4 0.2 0.0 0.0 0.0
todo draw quad
todo probe all rgba (0.1, 0.2, 0.3, 0.4)
draw quad
probe all rgba (0.1, 0.2, 0.3, 0.4)

View File

@ -7,7 +7,7 @@ size (1, 1)
0.1 0.2 0.3 0.4
[compute shader todo]
[compute shader]
RWTexture2D<float4> u;
@ -26,7 +26,7 @@ void main()
}
[test]
todo dispatch 1 1 1
dispatch 1 1 1
probe uav 0 (0, 0) rgba (0.4, 0.1, 0.2, 0.3)
[uav 0]
@ -35,7 +35,7 @@ size (1, 1)
0.1
[compute shader todo]
[compute shader]
RWTexture2D<float> u;
@ -51,5 +51,5 @@ void main()
}
[test]
todo dispatch 1 1 1
dispatch 1 1 1
probe uav 0 (0, 0) r (0.2)