diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 952675cf..09171fe4 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -46,6 +46,9 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, object->vk_queue_flags = properties->queueFlags; object->timestamp_bits = properties->timestampValidBits; + object->wait_completion_semaphore = VK_NULL_HANDLE; + object->pending_wait_completion_value = 0; + object->semaphores = NULL; object->semaphores_size = 0; object->semaphore_count = 0; @@ -61,6 +64,20 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device, return S_OK; } +bool vkd3d_queue_init_timeline_semaphore(struct vkd3d_queue *queue, struct d3d12_device *device) +{ + VkResult vr; + + if (!queue->wait_completion_semaphore + && (vr = vkd3d_create_timeline_semaphore(device, 0, &queue->wait_completion_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return false; + } + + return true; +} + void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) { const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; @@ -75,6 +92,8 @@ void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device) vkd3d_free(queue->semaphores); + VK_CALL(vkDestroySemaphore(device->vk_device, queue->wait_completion_semaphore, NULL)); + for (i = 0; i < ARRAY_SIZE(queue->old_vk_semaphores); ++i) { if (queue->old_vk_semaphores[i]) @@ -268,6 +287,7 @@ static HRESULT vkd3d_enqueue_gpu_fence(struct vkd3d_fence_worker *worker, } worker->enqueued_fences[worker->enqueued_fence_count].vk_fence = vk_fence; + worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = VK_NULL_HANDLE; waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence; waiting_fence->fence = fence; waiting_fence->value = value; @@ -317,6 +337,7 @@ static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, s static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_worker *worker) { unsigned int i; + bool timeline; size_t count; bool ret; @@ -325,8 +346,18 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo count = worker->fence_count + worker->enqueued_fence_count; - ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size, - count, sizeof(*worker->vk_fences)); + if ((timeline = worker->device->use_timeline_semaphores)) + { + ret = vkd3d_array_reserve((void **) &worker->vk_semaphores, &worker->vk_semaphores_size, + count, sizeof(*worker->vk_semaphores)); + ret &= vkd3d_array_reserve((void **) &worker->semaphore_wait_values, &worker->semaphore_wait_values_size, + count, sizeof(*worker->semaphore_wait_values)); + } + else + { + ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size, + count, sizeof(*worker->vk_fences)); + } ret &= vkd3d_array_reserve((void **)&worker->fences, &worker->fences_size, count, sizeof(*worker->fences)); if (!ret) @@ -339,7 +370,16 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo { struct vkd3d_enqueued_fence *current = &worker->enqueued_fences[i]; - worker->vk_fences[worker->fence_count] = current->vk_fence; + if (timeline) + { + worker->vk_semaphores[worker->fence_count] = current->vk_semaphore; + worker->semaphore_wait_values[worker->fence_count] = current->waiting_fence.value; + } + else + { + worker->vk_fences[worker->fence_count] = current->vk_fence; + } + worker->fences[worker->fence_count] = current->waiting_fence; ++worker->fence_count; } @@ -347,6 +387,66 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo worker->enqueued_fence_count = 0; } +static void vkd3d_wait_for_gpu_timeline_semaphores(struct vkd3d_fence_worker *worker) +{ + const struct d3d12_device *device = worker->device; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSemaphoreWaitInfoKHR wait_info; + VkSemaphore vk_semaphore; + uint64_t counter_value; + unsigned int i, j; + HRESULT hr; + int vr; + + if (!worker->fence_count) + return; + + wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR; + wait_info.pNext = NULL; + wait_info.flags = VK_SEMAPHORE_WAIT_ANY_BIT_KHR; + wait_info.pSemaphores = worker->vk_semaphores; + wait_info.semaphoreCount = worker->fence_count; + wait_info.pValues = worker->semaphore_wait_values; + + vr = VK_CALL(vkWaitSemaphoresKHR(device->vk_device, &wait_info, ~(uint64_t)0)); + if (vr == VK_TIMEOUT) + return; + if (vr != VK_SUCCESS) + { + ERR("Failed to wait for Vulkan timeline semaphores, vr %d.\n", vr); + return; + } + + for (i = 0, j = 0; i < worker->fence_count; ++i) + { + struct vkd3d_waiting_fence *current = &worker->fences[i]; + + vk_semaphore = worker->vk_semaphores[i]; + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, vk_semaphore, &counter_value))) < 0) + { + ERR("Failed to get Vulkan semaphore value, vr %d.\n", vr); + } + else if (counter_value >= current->value) + { + TRACE("Signaling fence %p value %#"PRIx64".\n", current->fence, current->value); + if (FAILED(hr = d3d12_fence_signal(current->fence, counter_value, VK_NULL_HANDLE))) + ERR("Failed to signal D3D12 fence, hr %#x.\n", hr); + + InterlockedDecrement(¤t->fence->pending_worker_operation_count); + continue; + } + + if (i != j) + { + worker->vk_semaphores[j] = worker->vk_semaphores[i]; + worker->semaphore_wait_values[j] = worker->semaphore_wait_values[i]; + worker->fences[j] = worker->fences[i]; + } + ++j; + } + worker->fence_count = j; +} + static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker) { struct d3d12_device *device = worker->device; @@ -408,7 +508,7 @@ static void *vkd3d_fence_worker_main(void *arg) for (;;) { - vkd3d_wait_for_gpu_fences(worker); + worker->wait_for_gpu_fences(worker); if (!worker->fence_count || InterlockedAdd(&worker->enqueued_fence_count, 0)) { @@ -473,6 +573,13 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, worker->vk_fences_size = 0; worker->fences = NULL; worker->fences_size = 0; + worker->vk_semaphores = NULL; + worker->vk_semaphores_size = 0; + worker->semaphore_wait_values = NULL; + worker->semaphore_wait_values_size = 0; + + worker->wait_for_gpu_fences = device->use_timeline_semaphores + ? vkd3d_wait_for_gpu_timeline_semaphores : vkd3d_wait_for_gpu_fences; if ((rc = vkd3d_mutex_init(&worker->mutex))) { @@ -535,6 +642,8 @@ HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, vkd3d_free(worker->enqueued_fences); vkd3d_free(worker->vk_fences); vkd3d_free(worker->fences); + vkd3d_free(worker->vk_semaphores); + vkd3d_free(worker->semaphore_wait_values); return S_OK; } @@ -684,6 +793,7 @@ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence) } d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true); + VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL)); vkd3d_mutex_unlock(&fence->mutex); } @@ -802,31 +912,21 @@ static HRESULT d3d12_fence_add_vk_semaphore(struct d3d12_fence *fence, return hr; } -static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence) +static bool d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence) { struct d3d12_device *device = fence->device; - struct vkd3d_signaled_semaphore *current; bool signal_null_event_cond = false; unsigned int i, j; - int rc; - - if ((rc = vkd3d_mutex_lock(&fence->mutex))) - { - ERR("Failed to lock mutex, error %d.\n", rc); - return hresult_from_errno(rc); - } - - fence->value = value; for (i = 0, j = 0; i < fence->event_count; ++i) { struct vkd3d_waiting_event *current = &fence->events[i]; - if (current->value <= value) + if (current->value <= fence->value) { if (current->event) { - fence->device->signal_event(current->event); + device->signal_event(current->event); } else { @@ -841,9 +941,28 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF ++j; } } + fence->event_count = j; - if (signal_null_event_cond) + return signal_null_event_cond; +} + +static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence) +{ + struct d3d12_device *device = fence->device; + struct vkd3d_signaled_semaphore *current; + unsigned int i; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + fence->value = value; + + if (d3d12_fence_signal_external_events_locked(fence)) vkd3d_cond_broadcast(&fence->null_event_cond); if (vk_fence) @@ -1069,12 +1188,160 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i return S_OK; } +static inline bool d3d12_fence_gpu_wait_is_completed(const struct d3d12_fence *fence, unsigned int i) +{ + const struct d3d12_device *device = fence->device; + const struct vkd3d_vk_device_procs *vk_procs; + uint64_t value; + VkResult vr; + + vk_procs = &device->vk_procs; + + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, + fence->gpu_waits[i].queue->wait_completion_semaphore, &value))) >= 0) + { + return value >= fence->gpu_waits[i].pending_value; + } + + ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); + return true; +} + +static inline bool d3d12_fence_has_pending_gpu_ops_locked(struct d3d12_fence *fence) +{ + const struct d3d12_device *device = fence->device; + const struct vkd3d_vk_device_procs *vk_procs; + uint64_t value; + unsigned int i; + VkResult vr; + + for (i = 0; i < fence->gpu_wait_count; ++i) + { + if (d3d12_fence_gpu_wait_is_completed(fence, i) && i < --fence->gpu_wait_count) + fence->gpu_waits[i] = fence->gpu_waits[fence->gpu_wait_count]; + } + if (fence->gpu_wait_count) + return true; + + /* Check for pending signals too. */ + if (fence->value >= fence->pending_timeline_value) + return false; + + vk_procs = &device->vk_procs; + + /* Check the actual semaphore value in case fence->value update is lagging. */ + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, fence->timeline_semaphore, &value))) < 0) + { + ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); + return false; + } + + return value < fence->pending_timeline_value; +} + +/* Replace the VkSemaphore with a new one to allow a lower value to be set. Ideally apps will + * only use this to reset the fence when no operations are pending on the queue. */ +static HRESULT d3d12_fence_reinit_timeline_semaphore_locked(struct d3d12_fence *fence, uint64_t value) +{ + const struct d3d12_device *device = fence->device; + const struct vkd3d_vk_device_procs *vk_procs; + VkSemaphore timeline_semaphore; + VkResult vr; + + if (d3d12_fence_has_pending_gpu_ops_locked(fence)) + { + /* This situation is not very likely because it means a fence with pending waits and/or signals was + * signalled on the CPU to a lower value. For now, emit a fixme so it can be patched if necessary. + * A patch already exists for this but it's not pretty. */ + FIXME("Unable to re-initialise timeline semaphore to a lower value due to pending GPU ops.\n"); + return E_FAIL; + } + + if ((vr = vkd3d_create_timeline_semaphore(device, value, &timeline_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + fence->value = value; + fence->pending_timeline_value = value; + + WARN("Replacing timeline semaphore with a new object.\n"); + + vk_procs = &device->vk_procs; + + VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL)); + fence->timeline_semaphore = timeline_semaphore; + + return S_OK; +} + +static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fence, uint64_t value) +{ + const struct d3d12_device *device = fence->device; + VkSemaphoreSignalInfoKHR info; + HRESULT hr = S_OK; + VkResult vr; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + /* We must only signal a value which is greater than the current value. + * That value can be in the range of current known value (fence->value), or as large as pending_timeline_value. + * Pending timeline value signal might be blocked by another synchronization primitive, and thus statically + * cannot be that value, so the safest thing to do is to check the current value which is updated by the fence + * wait thread continuously. This check is technically racy since the value might be immediately out of date, + * but there is no way to avoid this. */ + if (value > fence->value) + { + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + /* Sanity check against the delta limit. */ + if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference) + { + FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n", + value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference); + } + + info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO_KHR; + info.pNext = NULL; + info.semaphore = fence->timeline_semaphore; + info.value = value; + if ((vr = VK_CALL(vkSignalSemaphoreKHR(device->vk_device, &info))) >= 0) + { + fence->value = value; + if (value > fence->pending_timeline_value) + fence->pending_timeline_value = value; + } + else + { + ERR("Failed to signal timeline semaphore, vr %d.\n", vr); + hr = hresult_from_vk_result(vr); + } + } + else if (value < fence->value) + { + hr = d3d12_fence_reinit_timeline_semaphore_locked(fence, value); + } + + d3d12_fence_signal_external_events_locked(fence); + + vkd3d_mutex_unlock(&fence->mutex); + return hr; +} + static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value) { struct d3d12_fence *fence = impl_from_ID3D12Fence(iface); TRACE("iface %p, value %#"PRIx64".\n", iface, value); + if (fence->timeline_semaphore) + return d3d12_fence_signal_cpu_timeline_semaphore(fence, value); return d3d12_fence_signal(fence, value, VK_NULL_HANDLE); } @@ -1108,6 +1375,7 @@ static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device, UINT64 initial_value, D3D12_FENCE_FLAGS flags) { + VkResult vr; HRESULT hr; int rc; @@ -1136,6 +1404,16 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * fence->events_size = 0; fence->event_count = 0; + fence->timeline_semaphore = VK_NULL_HANDLE; + if (device->use_timeline_semaphores && (vr = vkd3d_create_timeline_semaphore(device, initial_value, + &fence->timeline_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + fence->pending_timeline_value = initial_value; + fence->gpu_wait_count = 0; + list_init(&fence->semaphores); fence->semaphore_count = 0; @@ -1172,6 +1450,25 @@ HRESULT d3d12_fence_create(struct d3d12_device *device, return S_OK; } +VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint64_t initial_value, + VkSemaphore *timeline_semaphore) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkSemaphoreTypeCreateInfoKHR type_info; + VkSemaphoreCreateInfo info; + + info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO; + info.pNext = &type_info; + info.flags = 0; + + type_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR; + type_info.pNext = NULL; + type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR; + type_info.initialValue = initial_value; + + return VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, timeline_semaphore)); +} + /* Command buffers */ static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list, const char *message, ...) @@ -6138,18 +6435,88 @@ static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *i FIXME("iface %p stub!\n", iface); } +static HRESULT d3d12_fence_update_gpu_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t value) +{ + const struct d3d12_device *device = fence->device; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + /* If we're attempting to async signal a fence with a value which is not strictly increasing the payload value, + * warn about this case. Do not treat this as an error since it works at least with RADV and Nvidia drivers and + * there's no workaround on the GPU side. */ + if (value <= fence->pending_timeline_value) + { + WARN("Fence %p values are not strictly increasing. Pending values: old %"PRIu64", new %"PRIu64".\n", + fence, fence->pending_timeline_value, value); + } + /* Sanity check against the delta limit. Use the current fence value. */ + else if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference) + { + FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n", + value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference); + } + fence->pending_timeline_value = value; + + vkd3d_mutex_unlock(&fence->mutex); + + return S_OK; +} + +static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker, VkSemaphore vk_semaphore, + struct d3d12_fence *fence, uint64_t value, struct vkd3d_queue *queue) +{ + struct vkd3d_waiting_fence *waiting_fence; + int rc; + + TRACE("worker %p, fence %p, value %#"PRIx64".\n", worker, fence, value); + + if ((rc = vkd3d_mutex_lock(&worker->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return hresult_from_errno(rc); + } + + if (!vkd3d_array_reserve((void **)&worker->enqueued_fences, &worker->enqueued_fences_size, + worker->enqueued_fence_count + 1, sizeof(*worker->enqueued_fences))) + { + ERR("Failed to add GPU timeline semaphore.\n"); + vkd3d_mutex_unlock(&worker->mutex); + return E_OUTOFMEMORY; + } + + worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = vk_semaphore; + waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence; + waiting_fence->fence = fence; + waiting_fence->value = value; + waiting_fence->queue = queue; + ++worker->enqueued_fence_count; + + InterlockedIncrement(&fence->pending_worker_operation_count); + + vkd3d_cond_signal(&worker->cond); + vkd3d_mutex_unlock(&worker->mutex); + + return S_OK; +} + static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface, ID3D12Fence *fence_iface, UINT64 value) { struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; const struct vkd3d_vk_device_procs *vk_procs; VkSemaphore vk_semaphore = VK_NULL_HANDLE; VkFence vk_fence = VK_NULL_HANDLE; struct vkd3d_queue *vkd3d_queue; + uint64_t sequence_number = 0; struct d3d12_device *device; struct d3d12_fence *fence; VkSubmitInfo submit_info; - uint64_t sequence_number; VkQueue vk_queue; VkResult vr; HRESULT hr; @@ -6162,10 +6529,21 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * fence = unsafe_impl_from_ID3D12Fence(fence_iface); - if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0) + if (device->use_timeline_semaphores) { - WARN("Failed to create Vulkan fence, vr %d.\n", vr); - goto fail_vkresult; + if (FAILED(hr = d3d12_fence_update_gpu_signal_timeline_semaphore(fence, value))) + return hr; + + vk_semaphore = fence->timeline_semaphore; + assert(vk_semaphore); + } + else + { + if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0) + { + WARN("Failed to create Vulkan fence, vr %d.\n", vr); + goto fail_vkresult; + } } if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue))) @@ -6175,7 +6553,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * goto fail; } - if ((vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, device, &vk_semaphore)) < 0) + if (!device->use_timeline_semaphores && (vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, + device, &vk_semaphore)) < 0) { ERR("Failed to create Vulkan semaphore, vr %d.\n", vr); vk_semaphore = VK_NULL_HANDLE; @@ -6191,7 +6570,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * submit_info.signalSemaphoreCount = vk_semaphore ? 1 : 0; submit_info.pSignalSemaphores = &vk_semaphore; - if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))) >= 0) + if (device->use_timeline_semaphores) + { + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.pNext = NULL; + timeline_submit_info.pSignalSemaphoreValues = &value; + timeline_submit_info.signalSemaphoreValueCount = submit_info.signalSemaphoreCount; + timeline_submit_info.waitSemaphoreValueCount = 0; + timeline_submit_info.pWaitSemaphoreValues = NULL; + submit_info.pNext = &timeline_submit_info; + } + + vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence)); + if (!device->use_timeline_semaphores && vr >= 0) { sequence_number = ++vkd3d_queue->submitted_sequence_number; @@ -6208,6 +6599,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * goto fail_vkresult; } + if (device->use_timeline_semaphores) + return vkd3d_enqueue_timeline_semaphore(&device->fence_worker, vk_semaphore, fence, value, vkd3d_queue); + if (vk_semaphore && SUCCEEDED(hr = d3d12_fence_add_vk_semaphore(fence, vk_semaphore, vk_fence, value))) vk_semaphore = VK_NULL_HANDLE; @@ -6243,32 +6637,27 @@ fail_vkresult: hr = hresult_from_vk_result(vr); fail: VK_CALL(vkDestroyFence(device->vk_device, vk_fence, NULL)); - VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL)); + if (!device->use_timeline_semaphores) + VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL)); return hr; } -static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, - ID3D12Fence *fence_iface, UINT64 value) +static HRESULT d3d12_command_queue_wait_binary_semaphore(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value) { static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; - struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); const struct vkd3d_vk_device_procs *vk_procs; struct vkd3d_signaled_semaphore *semaphore; uint64_t completed_value = 0; struct vkd3d_queue *queue; - struct d3d12_fence *fence; VkSubmitInfo submit_info; VkQueue vk_queue; VkResult vr; HRESULT hr; - TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); - vk_procs = &command_queue->device->vk_procs; queue = command_queue->vkd3d_queue; - fence = unsafe_impl_from_ID3D12Fence(fence_iface); - semaphore = d3d12_fence_acquire_vk_semaphore(fence, value, &completed_value); if (!semaphore && completed_value >= value) { @@ -6346,6 +6735,122 @@ fail: return hr; } +static inline void d3d12_fence_update_gpu_wait(struct d3d12_fence *fence, const struct vkd3d_queue *queue) +{ + unsigned int i; + bool found; + int rc; + + if ((rc = vkd3d_mutex_lock(&fence->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + return; + } + + for (i = 0, found = false; i < fence->gpu_wait_count; ++i) + { + if (fence->gpu_waits[i].queue == queue) + { + fence->gpu_waits[i].pending_value = queue->pending_wait_completion_value; + found = true; + } + else if (d3d12_fence_gpu_wait_is_completed(fence, i) && i < --fence->gpu_wait_count) + { + fence->gpu_waits[i] = fence->gpu_waits[fence->gpu_wait_count]; + } + } + + if (!found) + { + if (fence->gpu_wait_count < ARRAY_SIZE(fence->gpu_waits)) + { + fence->gpu_waits[fence->gpu_wait_count].queue = queue; + fence->gpu_waits[fence->gpu_wait_count++].pending_value = queue->pending_wait_completion_value; + } + else + { + FIXME("Unable to track GPU fence wait.\n"); + } + } + + vkd3d_mutex_unlock(&fence->mutex); +} + +static HRESULT d3d12_command_queue_wait_timeline_semaphore(struct d3d12_command_queue *command_queue, + struct d3d12_fence *fence, uint64_t value) +{ + static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT; + VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; + const struct vkd3d_vk_device_procs *vk_procs; + struct vkd3d_queue *queue; + VkSubmitInfo submit_info; + VkQueue vk_queue; + VkResult vr; + + vk_procs = &command_queue->device->vk_procs; + queue = command_queue->vkd3d_queue; + + assert(fence->timeline_semaphore); + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.pNext = NULL; + timeline_submit_info.signalSemaphoreValueCount = 0; + timeline_submit_info.pSignalSemaphoreValues = NULL; + timeline_submit_info.waitSemaphoreValueCount = 1; + timeline_submit_info.pWaitSemaphoreValues = &value; + + if (!(vk_queue = vkd3d_queue_acquire(queue))) + { + ERR("Failed to acquire queue %p.\n", queue); + return E_FAIL; + } + + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = &timeline_submit_info; + submit_info.waitSemaphoreCount = 1; + submit_info.pWaitSemaphores = &fence->timeline_semaphore; + submit_info.pWaitDstStageMask = &wait_stage_mask; + submit_info.commandBufferCount = 0; + submit_info.pCommandBuffers = NULL; + submit_info.signalSemaphoreCount = 0; + submit_info.pSignalSemaphores = NULL; + + ++queue->pending_wait_completion_value; + + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &queue->wait_completion_semaphore; + timeline_submit_info.signalSemaphoreValueCount = 1; + timeline_submit_info.pSignalSemaphoreValues = &queue->pending_wait_completion_value; + + d3d12_fence_update_gpu_wait(fence, queue); + + vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE)); + + vkd3d_queue_release(queue); + + if (vr < 0) + { + WARN("Failed to submit wait operation, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface, + ID3D12Fence *fence_iface, UINT64 value) +{ + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface); + + TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value); + + if (command_queue->device->use_timeline_semaphores) + return d3d12_command_queue_wait_timeline_semaphore(command_queue, fence, value); + + FIXME_ONCE("KHR_timeline_semaphore is not available or incompatible. Some wait commands may be unsupported.\n"); + return d3d12_command_queue_wait_binary_semaphore(command_queue, fence, value); +} + static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D12CommandQueue *iface, UINT64 *frequency) { diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index eb470c12..ab087d60 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -129,6 +129,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), + VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), /* EXT extensions */ VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), @@ -791,6 +792,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties; + VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties; VkPhysicalDeviceProperties2KHR properties2; @@ -803,6 +805,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features; VkPhysicalDeviceTransformFeedbackFeaturesEXT xfb_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features; + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore_features; VkPhysicalDeviceFeatures2 features2; }; @@ -814,11 +817,13 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; + VkPhysicalDeviceTimelineSemaphorePropertiesKHR *timeline_semaphore_properties; VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; + VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *timeline_semaphore_features; VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features; VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; @@ -838,6 +843,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i buffer_alignment_properties = &info->texel_buffer_alignment_properties; vertex_divisor_features = &info->vertex_divisor_features; vertex_divisor_properties = &info->vertex_divisor_properties; + timeline_semaphore_features = &info->timeline_semaphore_features; + timeline_semaphore_properties = &info->timeline_semaphore_properties; xfb_features = &info->xfb_features; xfb_properties = &info->xfb_properties; @@ -859,6 +866,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->features2, xfb_features); vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; vk_prepend_struct(&info->features2, vertex_divisor_features); + timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; + vk_prepend_struct(&info->features2, timeline_semaphore_features); if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); @@ -877,6 +886,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->properties2, xfb_properties); vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; vk_prepend_struct(&info->properties2, vertex_divisor_properties); + timeline_semaphore_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR; + vk_prepend_struct(&info->properties2, timeline_semaphore_properties); if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); @@ -1465,6 +1476,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1); + vulkan_info->timeline_semaphore_properties = physical_device_info->timeline_semaphore_properties; device->feature_options.DoublePrecisionFloatShaderOps = features->shaderFloat64; device->feature_options.OutputMergerLogicOp = features->logicOp; @@ -1589,6 +1601,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->EXT_shader_demote_to_helper_invocation = false; if (!physical_device_info->texel_buffer_alignment_features.texelBufferAlignment) vulkan_info->EXT_texel_buffer_alignment = false; + if (!physical_device_info->timeline_semaphore_features.timelineSemaphore) + vulkan_info->KHR_timeline_semaphore = false; vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties; @@ -1939,6 +1953,75 @@ static bool d3d12_is_64k_msaa_supported(struct d3d12_device *device) && info.Alignment <= 0x10000; } +/* A lower value can be signalled on a D3D12 fence. Vulkan timeline semaphores + * do not support this, but test if it works anyway. */ +static bool d3d12_is_timeline_semaphore_supported(const struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info; + VkSemaphore timeline_semaphore; + VkSubmitInfo submit_info; + bool result = false; + uint64_t value = 0; + VkQueue vk_queue; + VkResult vr; + + if (!device->vk_info.KHR_timeline_semaphore) + return false; + + if ((vr = vkd3d_create_timeline_semaphore(device, 1, &timeline_semaphore)) < 0) + { + WARN("Failed to create timeline semaphore, vr %d.\n", vr); + return false; + } + + if (!(vk_queue = vkd3d_queue_acquire(device->direct_queue))) + { + ERR("Failed to acquire queue %p.\n", device->direct_queue); + VK_CALL(vkDestroySemaphore(device->vk_device, timeline_semaphore, NULL)); + return false; + } + + submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + submit_info.pNext = &timeline_submit_info; + submit_info.waitSemaphoreCount = 0; + submit_info.pWaitSemaphores = NULL; + submit_info.pWaitDstStageMask = NULL; + submit_info.commandBufferCount = 0; + submit_info.pCommandBuffers = NULL; + submit_info.signalSemaphoreCount = 1; + submit_info.pSignalSemaphores = &timeline_semaphore; + + timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; + timeline_submit_info.pNext = NULL; + timeline_submit_info.pSignalSemaphoreValues = &value; + timeline_submit_info.signalSemaphoreValueCount = 1; + timeline_submit_info.waitSemaphoreValueCount = 0; + timeline_submit_info.pWaitSemaphoreValues = NULL; + + vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE)); + + if (vr >= 0) + { + if ((vr = VK_CALL(vkQueueWaitIdle(vk_queue))) < 0) + WARN("Failed to wait for queue, vr %d.\n", vr); + + if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, timeline_semaphore, &value))) < 0) + ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr); + else if (!(result = !value)) + WARN("Disabling timeline semaphore use due to incompatible behaviour.\n"); + } + else + { + WARN("Failed to submit signal operation, vr %d.\n", vr); + } + + vkd3d_queue_release(device->direct_queue); + VK_CALL(vkDestroySemaphore(device->vk_device, timeline_semaphore, NULL)); + + return result; +} + static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, const struct vkd3d_device_create_info *create_info) { @@ -2037,6 +2120,10 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, } device->feature_options4.MSAA64KBAlignedTextureSupported = d3d12_is_64k_msaa_supported(device); + device->use_timeline_semaphores = d3d12_is_timeline_semaphore_supported(device) + && vkd3d_queue_init_timeline_semaphore(device->direct_queue, device) + && vkd3d_queue_init_timeline_semaphore(device->compute_queue, device) + && vkd3d_queue_init_timeline_semaphore(device->copy_queue, device); TRACE("Created Vulkan device %p.\n", vk_device); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 796dfefd..56060b6d 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -59,6 +59,7 @@ #define VKD3D_MAX_SHADER_EXTENSIONS 3u #define VKD3D_MAX_SHADER_STAGES 5u #define VKD3D_MAX_VK_SYNC_OBJECTS 4u +#define VKD3D_MAX_FENCE_WAITING_QUEUES 4u #define VKD3D_MAX_DESCRIPTOR_SETS 64u /* D3D12 binding tier 3 has a limit of 2048 samplers. */ #define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u @@ -125,6 +126,7 @@ struct vkd3d_vulkan_info bool KHR_maintenance3; bool KHR_push_descriptor; bool KHR_sampler_mirror_clamp_to_edge; + bool KHR_timeline_semaphore; /* EXT device extensions */ bool EXT_calibrated_timestamps; bool EXT_conditional_rendering; @@ -150,6 +152,8 @@ struct vkd3d_vulkan_info VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; + VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties; + unsigned int shader_extension_count; enum vkd3d_shader_spirv_extension shader_extensions[VKD3D_MAX_SHADER_EXTENSIONS]; @@ -348,6 +352,7 @@ struct vkd3d_fence_worker struct vkd3d_enqueued_fence { VkFence vk_fence; + VkSemaphore vk_semaphore; struct vkd3d_waiting_fence waiting_fence; } *enqueued_fences; size_t enqueued_fences_size; @@ -357,6 +362,12 @@ struct vkd3d_fence_worker size_t vk_fences_size; struct vkd3d_waiting_fence *fences; size_t fences_size; + VkSemaphore *vk_semaphores; + size_t vk_semaphores_size; + uint64_t *semaphore_wait_values; + size_t semaphore_wait_values_size; + + void (*wait_for_gpu_fences)(struct vkd3d_fence_worker *worker); struct d3d12_device *device; }; @@ -511,6 +522,12 @@ struct vkd3d_signaled_semaphore bool is_acquired; }; +struct vkd3d_pending_fence_wait +{ + const struct vkd3d_queue *queue; + uint64_t pending_value; +}; + /* ID3D12Fence */ struct d3d12_fence { @@ -530,6 +547,11 @@ struct d3d12_fence size_t events_size; size_t event_count; + VkSemaphore timeline_semaphore; + uint64_t pending_timeline_value; + struct vkd3d_pending_fence_wait gpu_waits[VKD3D_MAX_FENCE_WAITING_QUEUES]; + unsigned int gpu_wait_count; + struct list semaphores; unsigned int semaphore_count; @@ -545,6 +567,9 @@ struct d3d12_fence HRESULT d3d12_fence_create(struct d3d12_device *device, uint64_t initial_value, D3D12_FENCE_FLAGS flags, struct d3d12_fence **fence); +VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint64_t initial_value, + VkSemaphore *timeline_semaphore); + /* ID3D12Heap */ struct d3d12_heap { @@ -1284,6 +1309,9 @@ struct vkd3d_queue VkQueueFlags vk_queue_flags; uint32_t timestamp_bits; + VkSemaphore wait_completion_semaphore; + uint64_t pending_wait_completion_value; + struct { VkSemaphore vk_semaphore; @@ -1298,6 +1326,7 @@ struct vkd3d_queue VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue); HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue); +bool vkd3d_queue_init_timeline_semaphore(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_release(struct vkd3d_queue *queue); @@ -1456,6 +1485,7 @@ struct d3d12_device VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; bool use_vk_heaps; + bool use_timeline_semaphores; }; HRESULT d3d12_device_create(struct vkd3d_instance *instance, diff --git a/libs/vkd3d/vulkan_procs.h b/libs/vkd3d/vulkan_procs.h index 60556735..34e0ab4b 100644 --- a/libs/vkd3d/vulkan_procs.h +++ b/libs/vkd3d/vulkan_procs.h @@ -195,6 +195,11 @@ VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutSupportKHR) /* VK_KHR_push_descriptor */ VK_DEVICE_EXT_PFN(vkCmdPushDescriptorSetKHR) +/* VK_KHR_timeline_semaphore */ +VK_DEVICE_EXT_PFN(vkGetSemaphoreCounterValueKHR) +VK_DEVICE_EXT_PFN(vkWaitSemaphoresKHR) +VK_DEVICE_EXT_PFN(vkSignalSemaphoreKHR) + /* VK_EXT_calibrated_timestamps */ VK_DEVICE_EXT_PFN(vkGetCalibratedTimestampsEXT) diff --git a/tests/d3d12.c b/tests/d3d12.c index 29bd40c8..2059b182 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -33240,7 +33240,9 @@ static void test_queue_wait(void) command_list = context.list; queue = context.queue; - queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL); + /* 'queue2' must not map to the same command queue as 'queue', or Wait() before GPU signal will fail. + * Using a compute queue fixes this on most hardware, but it may still fail on low spec hardware. */ + queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL); event = create_event(); ok(event, "Failed to create event.\n"); @@ -33305,12 +33307,6 @@ static void test_queue_wait(void) check_readback_data_uint(&rb, NULL, 0xff00ff00, 0); release_resource_readback(&rb); - if (!vkd3d_test_platform_is_windows()) - { - skip("Wait() is not implemented yet.\n"); /* FIXME */ - goto skip_tests; - } - /* Wait() before CPU signal */ update_buffer_data(cb, 0, sizeof(blue), &blue); queue_wait(queue, fence, 2); @@ -33386,7 +33382,6 @@ static void test_queue_wait(void) check_readback_data_uint(&rb, NULL, 0xff00ff00, 0); release_resource_readback(&rb); -skip_tests: /* Signal() and Wait() in the same command queue */ update_buffer_data(cb, 0, sizeof(blue), &blue); queue_signal(queue, fence, 7);