vkd3d-shader/dxil: Implement the DXIL ATOMICRMW instruction.

This commit is contained in:
Conor McCarthy 2024-03-01 16:03:47 +10:00 committed by Alexandre Julliard
parent b708a9b3b5
commit 9e0b9c3a7a
Notes: Alexandre Julliard 2024-03-27 23:07:08 +01:00
Approved-by: Giovanni Mascellani (@giomasce)
Approved-by: Henri Verbeet (@hverbeet)
Approved-by: Alexandre Julliard (@julliard)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/707
4 changed files with 209 additions and 2 deletions

View File

@ -448,6 +448,23 @@ static void shader_dump_global_flags(struct vkd3d_d3d_asm_compiler *compiler,
vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags); vkd3d_string_buffer_printf(&compiler->buffer, "unknown_flags(%#"PRIx64")", (uint64_t)global_flags);
} }
static void shader_dump_atomic_op_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t atomic_flags)
{
if (atomic_flags & VKD3DARF_SEQ_CST)
{
vkd3d_string_buffer_printf(&compiler->buffer, "_seqCst");
atomic_flags &= ~VKD3DARF_SEQ_CST;
}
if (atomic_flags & VKD3DARF_VOLATILE)
{
vkd3d_string_buffer_printf(&compiler->buffer, "_volatile");
atomic_flags &= ~VKD3DARF_VOLATILE;
}
if (atomic_flags)
vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", atomic_flags);
}
static void shader_dump_sync_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t sync_flags) static void shader_dump_sync_flags(struct vkd3d_d3d_asm_compiler *compiler, uint32_t sync_flags)
{ {
if (sync_flags & VKD3DSSF_GLOBAL_UAV) if (sync_flags & VKD3DSSF_GLOBAL_UAV)
@ -1734,6 +1751,19 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile
} }
break; break;
case VKD3DSIH_IMM_ATOMIC_CMP_EXCH:
case VKD3DSIH_IMM_ATOMIC_IADD:
case VKD3DSIH_IMM_ATOMIC_AND:
case VKD3DSIH_IMM_ATOMIC_IMAX:
case VKD3DSIH_IMM_ATOMIC_IMIN:
case VKD3DSIH_IMM_ATOMIC_OR:
case VKD3DSIH_IMM_ATOMIC_UMAX:
case VKD3DSIH_IMM_ATOMIC_UMIN:
case VKD3DSIH_IMM_ATOMIC_EXCH:
case VKD3DSIH_IMM_ATOMIC_XOR:
shader_dump_atomic_op_flags(compiler, ins->flags);
break;
case VKD3DSIH_SYNC: case VKD3DSIH_SYNC:
shader_dump_sync_flags(compiler, ins->flags); shader_dump_sync_flags(compiler, ins->flags);
break; break;

View File

@ -463,6 +463,32 @@ enum dxil_predicate
ICMP_SLE = 41, ICMP_SLE = 41,
}; };
enum dxil_rmw_code
{
RMW_XCHG = 0,
RMW_ADD = 1,
RMW_SUB = 2,
RMW_AND = 3,
RMW_NAND = 4,
RMW_OR = 5,
RMW_XOR = 6,
RMW_MAX = 7,
RMW_MIN = 8,
RMW_UMAX = 9,
RMW_UMIN = 10,
};
enum dxil_atomic_ordering
{
ORDERING_NOTATOMIC = 0,
ORDERING_UNORDERED = 1,
ORDERING_MONOTONIC = 2,
ORDERING_ACQUIRE = 3,
ORDERING_RELEASE = 4,
ORDERING_ACQREL = 5,
ORDERING_SEQCST = 6,
};
enum dxil_atomic_binop_code enum dxil_atomic_binop_code
{ {
ATOMIC_BINOP_ADD, ATOMIC_BINOP_ADD,
@ -2593,6 +2619,18 @@ static bool sm6_value_validate_is_bool(const struct sm6_value *value, struct sm6
return true; return true;
} }
static bool sm6_value_validate_is_pointer_to_i32(const struct sm6_value *value, struct sm6_parser *sm6)
{
if (!sm6_type_is_pointer(value->type) || !sm6_type_is_i32(value->type->u.pointer.type))
{
WARN("Operand result type %u is not a pointer to i32.\n", value->type->class);
vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
"An int32 pointer operand passed to a DXIL instruction is not an int32 pointer.");
return false;
}
return true;
}
static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx) static const struct sm6_value *sm6_parser_get_value_safe(struct sm6_parser *sm6, unsigned int idx)
{ {
if (idx < sm6->value_count) if (idx < sm6->value_count)
@ -3507,6 +3545,9 @@ struct function_emission_state
unsigned int temp_idx; unsigned int temp_idx;
}; };
static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs,
unsigned int component_count, struct function_emission_state *state, struct vkd3d_shader_register *reg);
static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record, static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_record *record,
struct vkd3d_shader_instruction *ins, struct sm6_value *dst) struct vkd3d_shader_instruction *ins, struct sm6_value *dst)
{ {
@ -3582,6 +3623,129 @@ static void sm6_parser_emit_alloca(struct sm6_parser *sm6, const struct dxil_rec
sm6_parser_declare_indexable_temp(sm6, elem_type, type[0]->u.array.count, alignment, true, 0, ins, dst); sm6_parser_declare_indexable_temp(sm6, elem_type, type[0]->u.array.count, alignment, true, 0, ins, dst);
} }
static enum vkd3d_shader_opcode map_dx_atomicrmw_op(uint64_t code)
{
switch (code)
{
case RMW_ADD:
return VKD3DSIH_IMM_ATOMIC_IADD;
case RMW_AND:
return VKD3DSIH_IMM_ATOMIC_AND;
case RMW_MAX:
return VKD3DSIH_IMM_ATOMIC_IMAX;
case RMW_MIN:
return VKD3DSIH_IMM_ATOMIC_IMIN;
case RMW_OR:
return VKD3DSIH_IMM_ATOMIC_OR;
case RMW_UMAX:
return VKD3DSIH_IMM_ATOMIC_UMAX;
case RMW_UMIN:
return VKD3DSIH_IMM_ATOMIC_UMIN;
case RMW_XCHG:
return VKD3DSIH_IMM_ATOMIC_EXCH;
case RMW_XOR:
return VKD3DSIH_IMM_ATOMIC_XOR;
default:
/* DXIL currently doesn't use SUB and NAND. */
return VKD3DSIH_INVALID;
}
}
static void sm6_parser_emit_atomicrmw(struct sm6_parser *sm6, const struct dxil_record *record,
struct function_emission_state *state, struct sm6_value *dst)
{
struct vkd3d_shader_register coord, const_offset, const_zero;
const struct vkd3d_shader_register *regs[2];
struct vkd3d_shader_dst_param *dst_params;
struct vkd3d_shader_src_param *src_params;
struct vkd3d_shader_instruction *ins;
const struct sm6_value *ptr, *src;
enum vkd3d_shader_opcode op;
unsigned int i = 0;
bool is_volatile;
uint64_t code;
if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i))
|| !sm6_value_validate_is_pointer_to_i32(ptr, sm6))
return;
if (ptr->u.reg.type != VKD3DSPR_GROUPSHAREDMEM)
{
WARN("Register is not groupshared.\n");
vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
"The destination register for an atomicrmw instruction is not groupshared memory.");
return;
}
dst->type = ptr->type->u.pointer.type;
if (!(src = sm6_parser_get_value_by_ref(sm6, record, dst->type, &i)))
return;
if (!dxil_record_validate_operand_count(record, i + 4, i + 4, sm6))
return;
if ((op = map_dx_atomicrmw_op(code = record->operands[i++])) == VKD3DSIH_INVALID)
{
FIXME("Unhandled atomicrmw op %"PRIu64".\n", code);
vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND,
"Operation %"PRIu64" for an atomicrmw instruction is unhandled.", code);
return;
}
is_volatile = record->operands[i++];
/* It's currently not possible to specify an atomic ordering in HLSL, and it defaults to seq_cst. */
if ((code = record->operands[i++]) != ORDERING_SEQCST)
FIXME("Unhandled atomic ordering %"PRIu64".\n", code);
if ((code = record->operands[i]) != 1)
WARN("Ignoring synchronisation scope %"PRIu64".\n", code);
if (ptr->structure_stride)
{
if (ptr->u.reg.idx[1].rel_addr)
{
regs[0] = &ptr->u.reg.idx[1].rel_addr->reg;
}
else
{
register_make_constant_uint(&const_offset, ptr->u.reg.idx[1].offset);
regs[0] = &const_offset;
}
register_make_constant_uint(&const_zero, 0);
regs[1] = &const_zero;
if (!sm6_parser_emit_reg_composite_construct(sm6, regs, 2, state, &coord))
return;
}
ins = state->ins;
vsir_instruction_init(ins, &sm6->p.location, op);
ins->flags = is_volatile ? VKD3DARF_SEQ_CST | VKD3DARF_VOLATILE : VKD3DARF_SEQ_CST;
if (!(src_params = instruction_src_params_alloc(ins, 2, sm6)))
return;
if (ptr->structure_stride)
src_param_init_vector_from_reg(&src_params[0], &coord);
else
src_param_make_constant_uint(&src_params[0], 0);
src_param_init_from_value(&src_params[1], src);
dst_params = instruction_dst_params_alloc(ins, 2, sm6);
register_init_ssa_scalar(&dst_params[0].reg, dst->type, dst, sm6);
dst_param_init(&dst_params[0]);
dst_params[1].reg = ptr->u.reg;
/* The groupshared register has data type UAV when accessed. */
dst_params[1].reg.data_type = VKD3D_DATA_UAV;
dst_params[1].reg.idx[1].rel_addr = NULL;
dst_params[1].reg.idx[1].offset = ~0u;
dst_params[1].reg.idx_count = 1;
dst_param_init(&dst_params[1]);
dst->u.reg = dst_params[0].reg;
}
static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a, static enum vkd3d_shader_opcode map_binary_op(uint64_t code, const struct sm6_type *type_a,
const struct sm6_type *type_b, struct sm6_parser *sm6) const struct sm6_type *type_b, struct sm6_parser *sm6)
{ {
@ -6676,6 +6840,13 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const
case FUNC_CODE_INST_ALLOCA: case FUNC_CODE_INST_ALLOCA:
sm6_parser_emit_alloca(sm6, record, ins, dst); sm6_parser_emit_alloca(sm6, record, ins, dst);
break; break;
case FUNC_CODE_INST_ATOMICRMW:
{
struct function_emission_state state = {code_block, ins};
sm6_parser_emit_atomicrmw(sm6, record, &state, dst);
sm6->p.program.temp_count = max(sm6->p.program.temp_count, state.temp_idx);
break;
}
case FUNC_CODE_INST_BINOP: case FUNC_CODE_INST_BINOP:
sm6_parser_emit_binop(sm6, record, ins, dst); sm6_parser_emit_binop(sm6, record, ins, dst);
break; break;

View File

@ -755,6 +755,12 @@ enum vkd3d_shader_uav_flags
VKD3DSUF_ORDER_PRESERVING_COUNTER = 0x100, VKD3DSUF_ORDER_PRESERVING_COUNTER = 0x100,
}; };
enum vkd3d_shader_atomic_rmw_flags
{
VKD3DARF_SEQ_CST = 0x1,
VKD3DARF_VOLATILE = 0x2,
};
enum vkd3d_tessellator_domain enum vkd3d_tessellator_domain
{ {
VKD3D_TESSELLATOR_DOMAIN_LINE = 1, VKD3D_TESSELLATOR_DOMAIN_LINE = 1,

View File

@ -24,7 +24,7 @@ void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
} }
[test] [test]
todo dispatch 4 1 1 todo(sm<6) dispatch 4 1 1
probe uav 1 (0) rui (0) probe uav 1 (0) rui (0)
probe uav 1 (1) rui (33) probe uav 1 (1) rui (33)
probe uav 1 (2) rui (66) probe uav 1 (2) rui (66)
@ -54,7 +54,7 @@ void main(uint local_idx : SV_GroupIndex, uint group_id : SV_GroupID)
} }
[test] [test]
todo dispatch 4 1 1 todo(sm<6) dispatch 4 1 1
probe uav 1 (0) ri (0) probe uav 1 (0) ri (0)
probe uav 1 (1) ri (-31) probe uav 1 (1) ri (-31)
probe uav 1 (2) ri (-62) probe uav 1 (2) ri (-62)