vkd3d: Support signalling a fence once all outstanding work is submitted to Vulkan.

When the client acquires the Vulkan queue it has to ensure that
it is not submitting work before other work it depends on already
submitted through the Direct3D 12 API but currently in the internal
vkd3d queue. Currently we suggest to enqueue signalling a fence and
than wait for it before acquiring the Vulkan queue, which is
correct but excessive: it will wait not just for the work currently
in the queue to be submitted, but for it to be executed too,
introducing useless dependencies.

By adding a way to enqueue signalling a fence on the CPU side we
allow the client to wait for the currently outstanding work to
be submitted to Vulkan, but nothing more.
This commit is contained in:
Giovanni Mascellani 2025-01-16 12:53:01 +01:00 committed by Henri Verbeet
parent bdb8291f6c
commit 22d0841412
Notes: Henri Verbeet 2025-01-21 14:13:31 +01:00
Approved-by: Henri Verbeet (@hverbeet)
Approved-by: Giovanni Mascellani (@giomasce)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/1343
5 changed files with 139 additions and 3 deletions

View File

@ -411,9 +411,13 @@ VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue);
* the Vulkan driver as being submitted before other work submitted
* though the Direct3D 12 API. If this is not desired, it is
* recommended to synchronize work submission using an ID3D12Fence
* object, by submitting to the queue a signal operation after all the
* Direct3D 12 work is submitted and waiting for it before calling
* vkd3d_acquire_vk_queue().
* object:
* 1. submit work through the Direct3D 12 API;
* 2. call vkd3d_queue_signal_on_cpu();
* 3. wait for the fence to be signalled;
* 4. call vkd3d_acquire_vk_queue(); it is guaranteed that all work submitted
* at point 1 has already been submitted to Vulkan (though not necessarily
* executed).
*
* \since 1.0
*/
@ -466,6 +470,21 @@ VKD3D_API HRESULT vkd3d_create_versioned_root_signature_deserializer(const void
*/
VKD3D_API void vkd3d_set_log_callback(PFN_vkd3d_log callback);
/**
* Signal a fence on the CPU once all the currently outstanding queue work is
* submitted to Vulkan.
*
* The fence will be signalled on the CPU (as if ID3D12Fence_Signal() was
* called) once all the work submitted through the Direct3D 12 API before
* vkd3d_queue_signal_on_cpu() is called has left the internal queue and has
* been submitted to the underlying Vulkan queue. Read the documentation for
* vkd3d_acquire_vk_queue() for more details.
*
* \since 1.15
*/
VKD3D_API HRESULT vkd3d_queue_signal_on_cpu(ID3D12CommandQueue *queue,
ID3D12Fence *fence, uint64_t value);
#endif /* VKD3D_NO_PROTOTYPES */
/*
@ -512,6 +531,10 @@ typedef HRESULT (*PFN_vkd3d_create_versioned_root_signature_deserializer)(const
/** Type of vkd3d_set_log_callback(). \since 1.4 */
typedef void (*PFN_vkd3d_set_log_callback)(PFN_vkd3d_log callback);
/** Type of vkd3d_queue_signal_on_cpu(). \since 1.15 */
typedef HRESULT (*PFN_vkd3d_queue_signal_on_cpu)(ID3D12CommandQueue *queue,
ID3D12Fence *fence, uint64_t value);
#ifdef __cplusplus
}
#endif /* __cplusplus */

View File

@ -6455,6 +6455,7 @@ static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op)
break;
case VKD3D_CS_OP_SIGNAL:
case VKD3D_CS_OP_SIGNAL_ON_CPU:
d3d12_fence_decref(op->u.signal.fence);
break;
@ -7445,6 +7446,7 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *
struct vkd3d_cs_op_data *op;
struct d3d12_fence *fence;
unsigned int i;
HRESULT hr;
queue->is_flushing = true;
@ -7478,6 +7480,11 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *
d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value);
break;
case VKD3D_CS_OP_SIGNAL_ON_CPU:
if (FAILED(hr = d3d12_fence_Signal(&op->u.signal.fence->ID3D12Fence1_iface, op->u.signal.value)))
ERR("Failed to signal fence %p, hr %s.\n", op->u.signal.fence, debugstr_hresult(hr));
break;
case VKD3D_CS_OP_EXECUTE:
d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count);
break;
@ -7620,6 +7627,36 @@ void vkd3d_release_vk_queue(ID3D12CommandQueue *queue)
return vkd3d_queue_release(d3d12_queue->vkd3d_queue);
}
HRESULT vkd3d_queue_signal_on_cpu(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, uint64_t value)
{
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface);
struct vkd3d_cs_op_data *op;
HRESULT hr = S_OK;
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
vkd3d_mutex_lock(&command_queue->op_mutex);
if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue)))
{
ERR("Failed to add op.\n");
hr = E_OUTOFMEMORY;
goto done;
}
op->opcode = VKD3D_CS_OP_SIGNAL_ON_CPU;
op->u.signal.fence = fence;
op->u.signal.value = value;
d3d12_fence_incref(fence);
d3d12_command_queue_submit_locked(command_queue);
done:
vkd3d_mutex_unlock(&command_queue->op_mutex);
return hr;
}
/* ID3D12CommandSignature */
static inline struct d3d12_command_signature *impl_from_ID3D12CommandSignature(ID3D12CommandSignature *iface)
{

View File

@ -17,6 +17,7 @@ global:
vkd3d_instance_from_device;
vkd3d_instance_get_vk_instance;
vkd3d_instance_incref;
vkd3d_queue_signal_on_cpu;
vkd3d_release_vk_queue;
vkd3d_resource_decref;
vkd3d_resource_incref;

View File

@ -1364,6 +1364,7 @@ enum vkd3d_cs_op
{
VKD3D_CS_OP_WAIT,
VKD3D_CS_OP_SIGNAL,
VKD3D_CS_OP_SIGNAL_ON_CPU,
VKD3D_CS_OP_EXECUTE,
VKD3D_CS_OP_UPDATE_MAPPINGS,
VKD3D_CS_OP_COPY_MAPPINGS,

View File

@ -1194,6 +1194,79 @@ static void test_application_info(void)
ID3D12Device_Release(device);
}
static void test_queue_signal_on_cpu(void)
{
PFN_vkd3d_queue_signal_on_cpu pfn_vkd3d_queue_signal_on_cpu = vkd3d_queue_signal_on_cpu;
D3D12_COMMAND_QUEUE_DESC queue_desc = {0};
ID3D12CommandQueue *queue, *queue2;
struct test_context context = {0};
struct test_context_desc desc;
ID3D12Fence *fence, *fence2;
unsigned int refcount;
enum vkd3d_result ret;
ID3D12Device *device;
HRESULT hr;
desc.no_render_target = true;
if (!init_test_context(&context, &desc))
return;
device = context.device;
queue = context.queue;
queue_desc.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
hr = ID3D12Device_CreateCommandQueue(device, &queue_desc, &IID_ID3D12CommandQueue, (void **)&queue2);
ok(hr == S_OK, "Couldn't create command queue, hr %#x.\n", hr);
hr = ID3D12Device_CreateFence(device, 0, 0, &IID_ID3D12Fence, (void **)&fence);
ok(hr == S_OK, "Couldn't create fence, hr %#x.\n", hr);
hr = ID3D12Device_CreateFence(device, 0, 0, &IID_ID3D12Fence, (void **)&fence2);
ok(hr == S_OK, "Couldn't create fence, hr %#x.\n", hr);
/* Queue signal on CPU immediately. */
ret = pfn_vkd3d_queue_signal_on_cpu(queue, fence, 1);
ok(ret == VKD3D_OK, "Couldn't queue signal on CPU, ret %#x.\n", ret);
hr = ID3D12Fence_SetEventOnCompletion(fence, 1, NULL);
ok(hr == S_OK, "Couldn't wait for fence, hr %#x.\n", hr);
/* Queue signal on CPU depending on a CPU-side signal. */
hr = ID3D12CommandQueue_Wait(queue, fence2, 2);
ok(hr == S_OK, "Couldn't queue wait, hr %#x.\n", hr);
ret = pfn_vkd3d_queue_signal_on_cpu(queue, fence, 2);
ok(ret == VKD3D_OK, "Couldn't queue signal on CPU, ret %#x.\n", ret);
hr = ID3D12Fence_Signal(fence2, 2);
ok(hr == S_OK, "Couldn't signal, hr %#x.\n", hr);
hr = ID3D12Fence_SetEventOnCompletion(fence, 2, NULL);
ok(hr == S_OK, "Couldn't wait for fence, hr %#x.\n", hr);
/* Queue signal on CPU depending on a GPU-side signal which is already satisfied. */
hr = ID3D12CommandQueue_Signal(queue, fence2, 3);
ok(hr == S_OK, "Couldn't queue signal, hr %#x.\n", hr);
hr = ID3D12CommandQueue_Wait(queue, fence2, 3);
ok(hr == S_OK, "Couldn't queue wait, hr %#x.\n", hr);
ret = pfn_vkd3d_queue_signal_on_cpu(queue, fence, 3);
ok(ret == VKD3D_OK, "Couldn't queue signal on CPU, ret %#x.\n", ret);
hr = ID3D12Fence_SetEventOnCompletion(fence, 3, NULL);
ok(hr == S_OK, "Couldn't wait for fence, hr %#x.\n", hr);
/* Queue signal on CPU depending on a GPU-side signal queued on another queue. */
hr = ID3D12CommandQueue_Wait(queue, fence2, 4);
ok(hr == S_OK, "Couldn't queue wait, hr %#x.\n", hr);
ret = pfn_vkd3d_queue_signal_on_cpu(queue, fence, 4);
ok(ret == VKD3D_OK, "Couldn't queue signal on CPU, ret %#x.\n", ret);
hr = ID3D12CommandQueue_Signal(queue2, fence2, 4);
ok(hr == S_OK, "Couldn't queue signal, hr %#x.\n", hr);
hr = ID3D12Fence_SetEventOnCompletion(fence, 4, NULL);
ok(hr == S_OK, "Couldn't wait for fence, hr %#x.\n", hr);
refcount = ID3D12CommandQueue_Release(queue2);
ok(refcount == 0, "%u references to command queue leaked.\n", refcount);
refcount = ID3D12Fence_Release(fence2);
ok(refcount == 0, "%u references to fence leaked.\n", refcount);
refcount = ID3D12Fence_Release(fence);
ok(refcount == 0, "%u references to fence leaked.\n", refcount);
destroy_test_context(&context);
}
static bool have_d3d12_device(void)
{
ID3D12Device *device;
@ -1241,4 +1314,5 @@ LOAD_VK_PFN(vkGetInstanceProcAddr)
run_test(test_external_resource_present_state);
run_test(test_formats);
run_test(test_application_info);
run_test(test_queue_signal_on_cpu);
}