diff --git a/libs/vkd3d-shader/hlsl.c b/libs/vkd3d-shader/hlsl.c index 23f54d3e..aed5b8c5 100644 --- a/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d-shader/hlsl.c @@ -2031,6 +2031,25 @@ struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const ch return &constant->node; } +struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type, + const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value, + struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_interlocked *interlocked; + + if (!(interlocked = hlsl_alloc(ctx, sizeof(*interlocked)))) + return NULL; + + init_node(&interlocked->node, HLSL_IR_INTERLOCKED, type, loc); + interlocked->op = op; + hlsl_copy_deref(ctx, &interlocked->dst, dst); + hlsl_src_from_node(&interlocked->coords, coords); + hlsl_src_from_node(&interlocked->cmp_value, cmp_value); + hlsl_src_from_node(&interlocked->value, value); + + return &interlocked->node; +} + bool hlsl_index_is_noncontiguous(struct hlsl_ir_index *index) { struct hlsl_type *type = index->val.node->data_type; @@ -2375,6 +2394,27 @@ static struct hlsl_ir_node *clone_index(struct hlsl_ctx *ctx, struct clone_instr return dst; } +static struct hlsl_ir_node *clone_interlocked(struct hlsl_ctx *ctx, + struct clone_instr_map *map, struct hlsl_ir_interlocked *src) +{ + struct hlsl_ir_interlocked *dst; + + if (!(dst = hlsl_alloc(ctx, sizeof(*dst)))) + return NULL; + init_node(&dst->node, HLSL_IR_INTERLOCKED, NULL, &src->node.loc); + dst->op = src->op; + + if (!clone_deref(ctx, map, &dst->dst, &src->dst)) + { + vkd3d_free(dst); + return NULL; + } + clone_src(map, &dst->coords, &src->coords); + clone_src(map, &dst->cmp_value, &src->cmp_value); + clone_src(map, &dst->value, &src->value); + return &dst->node; +} + static struct hlsl_ir_node *clone_compile(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_compile *compile) { @@ -2575,6 +2615,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_SWIZZLE: return clone_swizzle(ctx, map, hlsl_ir_swizzle(instr)); + case HLSL_IR_INTERLOCKED: + return clone_interlocked(ctx, map, hlsl_ir_interlocked(instr)); + case HLSL_IR_COMPILE: return clone_compile(ctx, map, hlsl_ir_compile(instr)); @@ -3013,6 +3056,7 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) [HLSL_IR_STORE ] = "HLSL_IR_STORE", [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", + [HLSL_IR_INTERLOCKED ] = "HLSL_IR_INTERLOCKED", [HLSL_IR_COMPILE] = "HLSL_IR_COMPILE", [HLSL_IR_SAMPLER_STATE] = "HLSL_IR_SAMPLER_STATE", @@ -3458,6 +3502,23 @@ static void dump_ir_index(struct vkd3d_string_buffer *buffer, const struct hlsl_ vkd3d_string_buffer_printf(buffer, "]"); } +static void dump_ir_interlocked(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_interlocked *interlocked) +{ + static const char *const op_names[] = + { + [HLSL_INTERLOCKED_ADD] = "add", + }; + + VKD3D_ASSERT(interlocked->op < ARRAY_SIZE(op_names)); + vkd3d_string_buffer_printf(buffer, "interlocked_%s(dst = ", op_names[interlocked->op]); + dump_deref(buffer, &interlocked->dst); + vkd3d_string_buffer_printf(buffer, ", coords = "); + dump_src(buffer, &interlocked->coords); + vkd3d_string_buffer_printf(buffer, ", value = "); + dump_src(buffer, &interlocked->value); + vkd3d_string_buffer_printf(buffer, ")"); +} + static void dump_ir_compile(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, const struct hlsl_ir_compile *compile) { @@ -3591,6 +3652,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_swizzle(buffer, hlsl_ir_swizzle(instr)); break; + case HLSL_IR_INTERLOCKED: + dump_ir_interlocked(buffer, hlsl_ir_interlocked(instr)); + break; + case HLSL_IR_COMPILE: dump_ir_compile(ctx, buffer, hlsl_ir_compile(instr)); break; @@ -3819,6 +3884,15 @@ static void free_ir_index(struct hlsl_ir_index *index) vkd3d_free(index); } +static void free_ir_interlocked(struct hlsl_ir_interlocked *interlocked) +{ + hlsl_cleanup_deref(&interlocked->dst); + hlsl_src_remove(&interlocked->coords); + hlsl_src_remove(&interlocked->cmp_value); + hlsl_src_remove(&interlocked->value); + vkd3d_free(interlocked); +} + static void free_ir_compile(struct hlsl_ir_compile *compile) { unsigned int i; @@ -3905,6 +3979,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_switch(hlsl_ir_switch(node)); break; + case HLSL_IR_INTERLOCKED: + free_ir_interlocked(hlsl_ir_interlocked(node)); + break; + case HLSL_IR_COMPILE: free_ir_compile(hlsl_ir_compile(node)); break; diff --git a/libs/vkd3d-shader/hlsl.h b/libs/vkd3d-shader/hlsl.h index 2d012d95..aea7e82e 100644 --- a/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d-shader/hlsl.h @@ -319,6 +319,7 @@ enum hlsl_ir_node_type HLSL_IR_STORE, HLSL_IR_SWIZZLE, HLSL_IR_SWITCH, + HLSL_IR_INTERLOCKED, HLSL_IR_COMPILE, HLSL_IR_SAMPLER_STATE, @@ -950,6 +951,25 @@ struct hlsl_ir_stateblock_constant char *name; }; +enum hlsl_interlocked_op +{ + HLSL_INTERLOCKED_ADD, +}; + +/* Represents an interlocked operation. + * + * The data_type of the node indicates whether or not the original value is returned. + * If the original value is not returned, the data_type is set to NULL. + * Otherwise, the data_type is set to the type of the original value. + */ +struct hlsl_ir_interlocked +{ + struct hlsl_ir_node node; + enum hlsl_interlocked_op op; + struct hlsl_deref dst; + struct hlsl_src coords, cmp_value, value; +}; + struct hlsl_scope { /* Item entry for hlsl_ctx.scopes. */ @@ -1247,6 +1267,12 @@ static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *n return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); } +static inline struct hlsl_ir_interlocked *hlsl_ir_interlocked(const struct hlsl_ir_node *node) +{ + VKD3D_ASSERT(node->type == HLSL_IR_INTERLOCKED); + return CONTAINING_RECORD(node, struct hlsl_ir_interlocked, node); +} + static inline struct hlsl_ir_compile *hlsl_ir_compile(const struct hlsl_ir_node *node) { VKD3D_ASSERT(node->type == HLSL_IR_COMPILE); @@ -1554,6 +1580,9 @@ struct hlsl_ir_node *hlsl_new_compile(struct hlsl_ctx *ctx, enum hlsl_compile_ty struct hlsl_block *args_instrs, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, struct hlsl_type *type, + const struct hlsl_deref *dst, struct hlsl_ir_node *coords, struct hlsl_ir_node *cmp_value, + struct hlsl_ir_node *value, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, struct hlsl_block *iter, struct hlsl_block *block, enum hlsl_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); diff --git a/libs/vkd3d-shader/hlsl.y b/libs/vkd3d-shader/hlsl.y index e5a03067..686037f8 100644 --- a/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d-shader/hlsl.y @@ -667,6 +667,7 @@ static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx case HLSL_IR_RESOURCE_LOAD: case HLSL_IR_RESOURCE_STORE: case HLSL_IR_SWITCH: + case HLSL_IR_INTERLOCKED: case HLSL_IR_STATEBLOCK_CONSTANT: hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected literal expression."); @@ -5374,6 +5375,109 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, return true; } +static bool intrinsic_interlocked(struct hlsl_ctx *ctx, enum hlsl_interlocked_op op, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name) +{ + struct hlsl_ir_node *lhs, *coords, *val, *orig_val = NULL; + struct hlsl_ir_node *interlocked, *void_ret; + struct hlsl_type *lhs_type, *val_type; + struct vkd3d_string_buffer *string; + struct hlsl_deref dst_deref; + + if (hlsl_version_lt(ctx, 5, 0)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "Interlocked functions can only be used in shader model 5.0 or higher."); + + if (params->args_count != 2 && params->args_count != 3) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Unexpected number of arguments to function '%s': expected 2 or 3, but got %u.", + name, params->args_count); + return false; + } + + lhs = params->args[0]; + lhs_type = lhs->data_type; + + if (lhs_type->class != HLSL_CLASS_SCALAR || (lhs_type->e.numeric.type != HLSL_TYPE_UINT + && lhs_type->e.numeric.type != HLSL_TYPE_INT)) + { + if ((string = hlsl_type_to_string(ctx, lhs_type))) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Unexpected type for argument 0 of '%s': expected 'uint' or 'int', but got '%s'.", + name, string->buffer); + hlsl_release_string_buffer(ctx, string); + } + return false; + } + + /* Interlocked*() functions always take uint for the value parameters, + * except for InterlockedMax()/InterlockedMin(). */ + val_type = hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT); + if (!(val = add_implicit_conversion(ctx, params->instrs, params->args[1], val_type, loc))) + return false; + + if (params->args_count == 3) + orig_val = params->args[2]; + + /* TODO: groupshared variables */ + if (lhs->type == HLSL_IR_INDEX && hlsl_index_chain_has_resource_access(hlsl_ir_index(lhs))) + { + if (!hlsl_index_is_resource_access(hlsl_ir_index(lhs))) + { + hlsl_fixme(ctx, &lhs->loc, "Non-direct structured resource interlocked targets."); + return false; + } + + if (!hlsl_init_deref_from_index_chain(ctx, &dst_deref, hlsl_ir_index(lhs)->val.node)) + return false; + coords = hlsl_ir_index(lhs)->idx.node; + + VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); + VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); + + if (hlsl_deref_get_type(ctx, &dst_deref)->class != HLSL_CLASS_UAV) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements."); + return false; + } + } + else + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Interlocked targets must be UAV elements."); + return false; + } + + interlocked = hlsl_new_interlocked(ctx, op, orig_val ? lhs_type : NULL, &dst_deref, coords, NULL, val, loc); + hlsl_cleanup_deref(&dst_deref); + if (!interlocked) + return false; + hlsl_block_add_instr(params->instrs, interlocked); + + if (orig_val) + { + if (orig_val->data_type->modifiers & HLSL_MODIFIER_CONST) + hlsl_error(ctx, &orig_val->loc, VKD3D_SHADER_ERROR_HLSL_MODIFIES_CONST, + "Output argument to '%s' is const.", name); + + if (!add_assignment(ctx, params->instrs, orig_val, ASSIGN_OP_ASSIGN, interlocked)) + return false; + } + + if (!(void_ret = hlsl_new_void_expr(ctx, loc))) + return false; + hlsl_block_add_instr(params->instrs, void_ret); + + return true; +} + +static bool intrinsic_InterlockedAdd(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_interlocked(ctx, HLSL_INTERLOCKED_ADD, params, loc, "InterlockedAdd"); +} + static const struct intrinsic_function { const char *name; @@ -5387,6 +5491,7 @@ intrinsic_functions[] = /* Note: these entries should be kept in alphabetical order. */ {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, + {"InterlockedAdd", -1, true, intrinsic_InterlockedAdd}, {"abs", 1, true, intrinsic_abs}, {"acos", 1, true, intrinsic_acos}, {"all", 1, true, intrinsic_all}, diff --git a/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d-shader/hlsl_codegen.c index 1c5a0490..40891a61 100644 --- a/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d-shader/hlsl_codegen.c @@ -739,6 +739,10 @@ static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *in res = func(ctx, &hlsl_ir_resource_store(instr)->resource, instr); return res; + case HLSL_IR_INTERLOCKED: + res = func(ctx, &hlsl_ir_interlocked(instr)->dst, instr); + return res; + default: return false; } @@ -1836,6 +1840,15 @@ static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx, return progress; } +static bool copy_propagation_transform_interlocked(struct hlsl_ctx *ctx, + struct hlsl_ir_interlocked *interlocked, struct copy_propagation_state *state) +{ + bool progress = false; + + progress |= copy_propagation_transform_object_load(ctx, &interlocked->dst, state, interlocked->node.index); + return progress; +} + static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store, struct copy_propagation_state *state) { @@ -2042,6 +2055,9 @@ static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_b progress |= copy_propagation_process_switch(ctx, hlsl_ir_switch(instr), state); break; + case HLSL_IR_INTERLOCKED: + progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state); + default: break; } @@ -2225,6 +2241,24 @@ static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *ins validate_component_index_range_from_deref(ctx, &store->lhs); break; } + case HLSL_IR_INTERLOCKED: + { + struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr); + + if (!interlocked->dst.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Accessed resource must have a single uniform source."); + } + else if (validate_component_index_range_from_deref(ctx, &interlocked->dst) == DEREF_VALIDATION_NOT_CONSTANT) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Accessed resource from \"%s\" must be determinable at compile time.", + interlocked->dst.var->name); + note_non_static_deref_expressions(ctx, &interlocked->dst, "accessed resource"); + } + break; + } default: break; } @@ -4478,6 +4512,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_LOOP: case HLSL_IR_RESOURCE_STORE: case HLSL_IR_SWITCH: + case HLSL_IR_INTERLOCKED: break; case HLSL_IR_STATEBLOCK_CONSTANT: /* Stateblock constants should not appear in the shader program. */ @@ -4724,6 +4759,19 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop index->idx.node->last_read = last_read; break; } + case HLSL_IR_INTERLOCKED: + { + struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr); + + var = interlocked->dst.var; + var->last_read = max(var->last_read, last_read); + deref_mark_last_read(&interlocked->dst, last_read); + interlocked->coords.node->last_read = last_read; + interlocked->value.node->last_read = last_read; + if (interlocked->cmp_value.node) + interlocked->cmp_value.node->last_read = last_read; + break; + } case HLSL_IR_JUMP: { struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); @@ -5135,6 +5183,10 @@ static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *in register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource); break; + case HLSL_IR_INTERLOCKED: + register_deref_usage(ctx, &hlsl_ir_interlocked(instr)->dst); + break; + default: break; } @@ -9942,6 +9994,45 @@ static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx, } } +static bool sm4_generate_vsir_instr_interlocked(struct hlsl_ctx *ctx, + struct vsir_program *program, struct hlsl_ir_interlocked *interlocked) +{ + + static const enum vkd3d_shader_opcode opcodes[] = + { + [HLSL_INTERLOCKED_ADD] = VKD3DSIH_ATOMIC_IADD, + }; + + static const enum vkd3d_shader_opcode imm_opcodes[] = + { + [HLSL_INTERLOCKED_ADD] = VKD3DSIH_IMM_ATOMIC_IADD, + }; + + struct hlsl_ir_node *coords = interlocked->coords.node, *value = interlocked->value.node; + struct hlsl_ir_node *instr = &interlocked->node; + bool is_imm = interlocked->node.reg.allocated; + struct vkd3d_shader_dst_param *dst_param; + struct vkd3d_shader_instruction *ins; + enum vkd3d_shader_opcode opcode; + + opcode = is_imm ? imm_opcodes[interlocked->op] : opcodes[interlocked->op]; + if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, is_imm ? 2 : 1, 2))) + return false; + + if (is_imm) + vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr); + + dst_param = is_imm ? &ins->dst[1] : &ins->dst[0]; + if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, dst_param, &interlocked->dst, &instr->loc, 0)) + return false; + dst_param->reg.dimension = VSIR_DIMENSION_NONE; + + vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL); + vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL); + + return true; +} + static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx, struct vsir_program *program, const struct hlsl_ir_jump *jump) { @@ -10119,6 +10210,10 @@ static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *blo generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr)); break; + case HLSL_IR_INTERLOCKED: + sm4_generate_vsir_instr_interlocked(ctx, program, hlsl_ir_interlocked(instr)); + break; + default: break; } diff --git a/libs/vkd3d-shader/tpf.c b/libs/vkd3d-shader/tpf.c index aa666086..957ddb10 100644 --- a/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d-shader/tpf.c @@ -4026,6 +4026,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ break; case VKD3DSIH_ADD: + case VKD3DSIH_ATOMIC_IADD: case VKD3DSIH_AND: case VKD3DSIH_BREAK: case VKD3DSIH_CASE: @@ -4068,6 +4069,7 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_ case VKD3DSIH_IMAD: case VKD3DSIH_IMAX: case VKD3DSIH_IMIN: + case VKD3DSIH_IMM_ATOMIC_IADD: case VKD3DSIH_IMUL: case VKD3DSIH_INE: case VKD3DSIH_INEG: