vkd3d-shader/hlsl: Implement the InterlockedAdd() intrinsic.

This commit is contained in:
Shaun Ren 2024-12-12 17:59:15 -05:00 committed by Henri Verbeet
parent 5f943e9110
commit 660a71ef22
Notes: Henri Verbeet 2025-01-20 16:18:51 +01:00
Approved-by: Henri Verbeet (@hverbeet)
Approved-by: Elizabeth Figura (@zfigura)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1330
5 changed files with 309 additions and 0 deletions

View File

@ -2031,6 +2031,25 @@ struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const ch
return &constant->node;
}
struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type,
const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value,
struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc)
{
struct hlsl_ir_interlocked *interlocked;
if (!(interlocked = hlsl_alloc(ctx, sizeof(*interlocked))))
return NULL;
init_node(&interlocked->node, HLSL_IR_INTERLOCKED, type, loc);
interlocked->op = op;
hlsl_copy_deref(ctx, &interlocked->dst, dst);
hlsl_src_from_node(&interlocked->coords, coords);
hlsl_src_from_node(&interlocked->cmp_value, cmp_value);
hlsl_src_from_node(&interlocked->value, value);
return &interlocked->node;
}
bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index)
{
struct hlsl_type *type = index->val.node->data_type;
@ -2375,6 +2394,27 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr
return dst;
}
static struct hlsl_ir_node *clone_interlocked(struct hlsl_ctx *ctx,
struct clone_instr_map *map, struct hlsl_ir_interlocked *src)
{
struct hlsl_ir_interlocked *dst;
if (!(dst = hlsl_alloc(ctx, sizeof(*dst))))
return NULL;
init_node(&dst->node, HLSL_IR_INTERLOCKED, NULL, &src->node.loc);
dst->op = src->op;
if (!clone_deref(ctx, map, &dst->dst, &src->dst))
{
vkd3d_free(dst);
return NULL;
}
clone_src(map, &dst->coords, &src->coords);
clone_src(map, &dst->cmp_value, &src->cmp_value);
clone_src(map, &dst->value, &src->value);
return &dst->node;
}
static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx,
struct clone_instr_map *map, struct hlsl_ir_compile *compile)
{
@ -2575,6 +2615,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx,
case HLSL_IR_SWIZZLE:
return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr));
case HLSL_IR_INTERLOCKED:
return clone_interlocked(ctx, map, hlsl_ir_interlocked(instr));
case HLSL_IR_COMPILE:
return clone_compile(ctx, map, hlsl_ir_compile(instr));
@ -3013,6 +3056,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type)
[HLSL_IR_STORE ] = "HLSL_IR_STORE",
[HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH",
[HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE",
[HLSL_IR_INTERLOCKED ] = "HLSL_IR_INTERLOCKED",
[HLSL_IR_COMPILE] = "HLSL_IR_COMPILE",
[HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE",
@ -3458,6 +3502,23 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_
vkd3d_string_buffer_printf(buffer, "]");
}
static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_interlocked *interlocked)
{
static const char *const op_names[] =
{
[HLSL_INTERLOCKED_ADD] = "add",
};
VKD3D_ASSERT(interlocked->op < ARRAY_SIZE(op_names));
vkd3d_string_buffer_printf(buffer, "interlocked_%s(dst = ", op_names[interlocked->op]);
dump_deref(buffer, &interlocked->dst);
vkd3d_string_buffer_printf(buffer, ", coords = ");
dump_src(buffer, &interlocked->coords);
vkd3d_string_buffer_printf(buffer, ", value = ");
dump_src(buffer, &interlocked->value);
vkd3d_string_buffer_printf(buffer, ")");
}
static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
const struct hlsl_ir_compile *compile)
{
@ -3591,6 +3652,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer,
dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr));
break;
case HLSL_IR_INTERLOCKED:
dump_ir_interlocked(buffer, hlsl_ir_interlocked(instr));
break;
case HLSL_IR_COMPILE:
dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr));
break;
@ -3819,6 +3884,15 @@ static void free_ir_index(struct hlsl_ir_index *index)
vkd3d_free(index);
}
static void free_ir_interlocked(struct hlsl_ir_interlocked *interlocked)
{
hlsl_cleanup_deref(&interlocked->dst);
hlsl_src_remove(&interlocked->coords);
hlsl_src_remove(&interlocked->cmp_value);
hlsl_src_remove(&interlocked->value);
vkd3d_free(interlocked);
}
static void free_ir_compile(struct hlsl_ir_compile *compile)
{
unsigned int i;
@ -3905,6 +3979,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node)
free_ir_switch(hlsl_ir_switch(node));
break;
case HLSL_IR_INTERLOCKED:
free_ir_interlocked(hlsl_ir_interlocked(node));
break;
case HLSL_IR_COMPILE:
free_ir_compile(hlsl_ir_compile(node));
break;

View File

@ -319,6 +319,7 @@ enum hlsl_ir_node_type
HLSL_IR_STORE,
HLSL_IR_SWIZZLE,
HLSL_IR_SWITCH,
HLSL_IR_INTERLOCKED,
HLSL_IR_COMPILE,
HLSL_IR_SAMPLER_STATE,
@ -950,6 +951,25 @@ struct hlsl_ir_stateblock_constant
char *name;
};
enum hlsl_interlocked_op
{
HLSL_INTERLOCKED_ADD,
};
/* Represents an interlocked operation.
*
* The data_type of the node indicates whether or not the original value is returned.
* If the original value is not returned, the data_type is set to NULL.
* Otherwise, the data_type is set to the type of the original value.
*/
struct hlsl_ir_interlocked
{
struct hlsl_ir_node node;
enum hlsl_interlocked_op op;
struct hlsl_deref dst;
struct hlsl_src coords, cmp_value, value;
};
struct hlsl_scope
{
/* Item entry for hlsl_ctx.scopes. */
@ -1247,6 +1267,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n
return CONTAINING_RECORD(node, struct hlsl_ir_switch, node);
}
static inline struct hlsl_ir_interlocked *hlsl_ir_interlocked(const struct hlsl_ir_node *node)
{
VKD3D_ASSERT(node->type == HLSL_IR_INTERLOCKED);
return CONTAINING_RECORD(node, struct hlsl_ir_interlocked, node);
}
static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node)
{
VKD3D_ASSERT(node->type == HLSL_IR_COMPILE);
@ -1554,6 +1580,9 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty
struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val,
struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type,
const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value,
struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc);
struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter,
struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type,
unsigned int unroll_limit, const struct vkd3d_shader_location *loc);

View File

@ -667,6 +667,7 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx
case HLSL_IR_RESOURCE_LOAD:
case HLSL_IR_RESOURCE_STORE:
case HLSL_IR_SWITCH:
case HLSL_IR_INTERLOCKED:
case HLSL_IR_STATEBLOCK_CONSTANT:
hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
"Expected literal expression.");
@ -5374,6 +5375,109 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx,
return true;
}
static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name)
{
struct hlsl_ir_node *lhs, *coords, *val, *orig_val = NULL;
struct hlsl_ir_node *interlocked, *void_ret;
struct hlsl_type *lhs_type, *val_type;
struct vkd3d_string_buffer *string;
struct hlsl_deref dst_deref;
if (hlsl_version_lt(ctx, 5, 0))
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
"Interlocked functions can only be used in shader model 5.0 or higher.");
if (params->args_count != 2 && params->args_count != 3)
{
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
"Unexpected number of arguments to function '%s': expected 2 or 3, but got %u.",
name, params->args_count);
return false;
}
lhs = params->args[0];
lhs_type = lhs->data_type;
if (lhs_type->class != HLSL_CLASS_SCALAR || (lhs_type->e.numeric.type != HLSL_TYPE_UINT
&& lhs_type->e.numeric.type != HLSL_TYPE_INT))
{
if ((string = hlsl_type_to_string(ctx, lhs_type)))
{
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
"Unexpected type for argument 0 of '%s': expected 'uint' or 'int', but got '%s'.",
name, string->buffer);
hlsl_release_string_buffer(ctx, string);
}
return false;
}
/* Interlocked*() functions always take uint for the value parameters,
* except for InterlockedMax()/InterlockedMin(). */
val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT);
if (!(val = add_implicit_conversion(ctx, params->instrs, params->args[1], val_type, loc)))
return false;
if (params->args_count == 3)
orig_val = params->args[2];
/* TODO: groupshared variables */
if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_resource_access(hlsl_ir_index(lhs)))
{
if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs)))
{
hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource interlocked targets.");
return false;
}
if (!hlsl_init_deref_from_index_chain(ctx, &dst_deref, hlsl_ir_index(lhs)->val.node))
return false;
coords = hlsl_ir_index(lhs)->idx.node;
VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR);
VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT);
if (hlsl_deref_get_type(ctx, &dst_deref)->class != HLSL_CLASS_UAV)
{
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements.");
return false;
}
}
else
{
hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements.");
return false;
}
interlocked = hlsl_new_interlocked(ctx, op, orig_val ? lhs_type : NULL, &dst_deref, coords, NULL, val, loc);
hlsl_cleanup_deref(&dst_deref);
if (!interlocked)
return false;
hlsl_block_add_instr(params->instrs, interlocked);
if (orig_val)
{
if (orig_val->data_type->modifiers & HLSL_MODIFIER_CONST)
hlsl_error(ctx, &orig_val->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST,
"Output argument to '%s' is const.", name);
if (!add_assignment(ctx, params->instrs, orig_val, ASSIGN_OP_ASSIGN, interlocked))
return false;
}
if (!(void_ret = hlsl_new_void_expr(ctx, loc)))
return false;
hlsl_block_add_instr(params->instrs, void_ret);
return true;
}
static bool intrinsic_InterlockedAdd(struct hlsl_ctx *ctx,
const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_ADD, params, loc, "InterlockedAdd");
}
static const struct intrinsic_function
{
const char *name;
@ -5387,6 +5491,7 @@ intrinsic_functions[] =
/* Note: these entries should be kept in alphabetical order. */
{"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4},
{"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount},
{"InterlockedAdd", -1, true, intrinsic_InterlockedAdd},
{"abs", 1, true, intrinsic_abs},
{"acos", 1, true, intrinsic_acos},
{"all", 1, true, intrinsic_all},

View File

@ -739,6 +739,10 @@ static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *in
res = func(ctx, &hlsl_ir_resource_store(instr)->resource, instr);
return res;
case HLSL_IR_INTERLOCKED:
res = func(ctx, &hlsl_ir_interlocked(instr)->dst, instr);
return res;
default:
return false;
}
@ -1836,6 +1840,15 @@ static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx,
return progress;
}
static bool copy_propagation_transform_interlocked(struct hlsl_ctx *ctx,
struct hlsl_ir_interlocked *interlocked, struct copy_propagation_state *state)
{
bool progress = false;
progress |= copy_propagation_transform_object_load(ctx, &interlocked->dst, state, interlocked->node.index);
return progress;
}
static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
struct copy_propagation_state *state)
{
@ -2042,6 +2055,9 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b
progress |= copy_propagation_process_switch(ctx, hlsl_ir_switch(instr), state);
break;
case HLSL_IR_INTERLOCKED:
progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state);
default:
break;
}
@ -2225,6 +2241,24 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins
validate_component_index_range_from_deref(ctx, &store->lhs);
break;
}
case HLSL_IR_INTERLOCKED:
{
struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr);
if (!interlocked->dst.var->is_uniform)
{
hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF,
"Accessed resource must have a single uniform source.");
}
else if (validate_component_index_range_from_deref(ctx, &interlocked->dst) == DEREF_VALIDATION_NOT_CONSTANT)
{
hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF,
"Accessed resource from \"%s\" must be determinable at compile time.",
interlocked->dst.var->name);
note_non_static_deref_expressions(ctx, &interlocked->dst, "accessed resource");
}
break;
}
default:
break;
}
@ -4478,6 +4512,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
case HLSL_IR_LOOP:
case HLSL_IR_RESOURCE_STORE:
case HLSL_IR_SWITCH:
case HLSL_IR_INTERLOCKED:
break;
case HLSL_IR_STATEBLOCK_CONSTANT:
/* Stateblock constants should not appear in the shader program. */
@ -4724,6 +4759,19 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop
index->idx.node->last_read = last_read;
break;
}
case HLSL_IR_INTERLOCKED:
{
struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr);
var = interlocked->dst.var;
var->last_read = max(var->last_read, last_read);
deref_mark_last_read(&interlocked->dst, last_read);
interlocked->coords.node->last_read = last_read;
interlocked->value.node->last_read = last_read;
if (interlocked->cmp_value.node)
interlocked->cmp_value.node->last_read = last_read;
break;
}
case HLSL_IR_JUMP:
{
struct hlsl_ir_jump *jump = hlsl_ir_jump(instr);
@ -5135,6 +5183,10 @@ static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *in
register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource);
break;
case HLSL_IR_INTERLOCKED:
register_deref_usage(ctx, &hlsl_ir_interlocked(instr)->dst);
break;
default:
break;
}
@ -9942,6 +9994,45 @@ static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx,
}
}
static bool sm4_generate_vsir_instr_interlocked(struct hlsl_ctx *ctx,
struct vsir_program *program, struct hlsl_ir_interlocked *interlocked)
{
static const enum vkd3d_shader_opcode opcodes[] =
{
[HLSL_INTERLOCKED_ADD] = VKD3DSIH_ATOMIC_IADD,
};
static const enum vkd3d_shader_opcode imm_opcodes[] =
{
[HLSL_INTERLOCKED_ADD] = VKD3DSIH_IMM_ATOMIC_IADD,
};
struct hlsl_ir_node *coords = interlocked->coords.node, *value = interlocked->value.node;
struct hlsl_ir_node *instr = &interlocked->node;
bool is_imm = interlocked->node.reg.allocated;
struct vkd3d_shader_dst_param *dst_param;
struct vkd3d_shader_instruction *ins;
enum vkd3d_shader_opcode opcode;
opcode = is_imm ? imm_opcodes[interlocked->op] : opcodes[interlocked->op];
if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, is_imm ? 2 : 1, 2)))
return false;
if (is_imm)
vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
dst_param = is_imm ? &ins->dst[1] : &ins->dst[0];
if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, dst_param, &interlocked->dst, &instr->loc, 0))
return false;
dst_param->reg.dimension = VSIR_DIMENSION_NONE;
vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL);
vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL);
return true;
}
static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx,
struct vsir_program *program, const struct hlsl_ir_jump *jump)
{
@ -10119,6 +10210,10 @@ static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo
generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr));
break;
case HLSL_IR_INTERLOCKED:
sm4_generate_vsir_instr_interlocked(ctx, program, hlsl_ir_interlocked(instr));
break;
default:
break;
}

View File

@ -4026,6 +4026,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
break;
case VKD3DSIH_ADD:
case VKD3DSIH_ATOMIC_IADD:
case VKD3DSIH_AND:
case VKD3DSIH_BREAK:
case VKD3DSIH_CASE:
@ -4068,6 +4069,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
case VKD3DSIH_IMAD:
case VKD3DSIH_IMAX:
case VKD3DSIH_IMIN:
case VKD3DSIH_IMM_ATOMIC_IADD:
case VKD3DSIH_IMUL:
case VKD3DSIH_INE:
case VKD3DSIH_INEG: