From 0baf0302d1b342e73a61f6ec051a6fd700bbc09d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B3zef=20Kucia?= Date: Thu, 13 Jun 2019 13:47:34 +0200 Subject: [PATCH] vkd3d: Avoid holding mutex while waiting for fences. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We maintain separate arrays for enqueued fences and fences owned by the fence worker thread. Signed-off-by: Józef Kucia Signed-off-by: Henri Verbeet Signed-off-by: Alexandre Julliard --- include/private/vkd3d_common.h | 10 +- libs/vkd3d/command.c | 174 ++++++++++++++++++++++----------- libs/vkd3d/vkd3d_private.h | 15 +++ 3 files changed, 142 insertions(+), 57 deletions(-) diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index 24420924..16c31e63 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -118,7 +118,7 @@ static inline LONG InterlockedIncrement(LONG volatile *x) return __sync_add_and_fetch(x, 1); } # else -# error "InterlockedIncrement not implemented for this platform" +# error "InterlockedIncrement() not implemented for this platform" # endif /* HAVE_SYNC_ADD_AND_FETCH */ # if HAVE_SYNC_SUB_AND_FETCH @@ -127,8 +127,14 @@ static inline LONG InterlockedDecrement(LONG volatile *x) return __sync_sub_and_fetch(x, 1); } # else -# error "InterlockedDecrement not implemented for this platform" +# error "InterlockedDecrement() not implemented for this platform" # endif #endif /* _WIN32 */ +#if HAVE_SYNC_ADD_AND_FETCH +# define atomic_add_fetch(ptr, val) __sync_add_and_fetch(ptr, val) +#else +# error "atomic_add_fetch() not implemented for this platform" +#endif /* HAVE_SYNC_ADD_AND_FETCH */ + #endif /* __VKD3D_COMMON_H */ diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 25c8005d..7634e71b 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -257,27 +257,22 @@ static HRESULT vkd3d_enqueue_gpu_fence(struct vkd3d_fence_worker *worker, return hresult_from_errno(rc); } - if (!vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size, - worker->fence_count + 1, sizeof(*worker->vk_fences))) - { - ERR("Failed to add GPU fence.\n"); - pthread_mutex_unlock(&worker->mutex); - return E_OUTOFMEMORY; - } - if (!vkd3d_array_reserve((void **)&worker->fences, &worker->fences_size, - worker->fence_count + 1, sizeof(*worker->fences))) + if (!vkd3d_array_reserve((void **)&worker->enqueued_fences, &worker->enqueued_fences_size, + worker->enqueued_fence_count + 1, sizeof(*worker->enqueued_fences))) { ERR("Failed to add GPU fence.\n"); pthread_mutex_unlock(&worker->mutex); return E_OUTOFMEMORY; } - worker->vk_fences[worker->fence_count] = vk_fence; - worker->fences[worker->fence_count].fence = fence; - worker->fences[worker->fence_count].value = value; - worker->fences[worker->fence_count].queue = queue; - worker->fences[worker->fence_count].queue_sequence_number = queue_sequence_number; - ++worker->fence_count; + worker->enqueued_fences[worker->enqueued_fence_count].vk_fence = vk_fence; + worker->enqueued_fences[worker->enqueued_fence_count].fence = fence; + worker->enqueued_fences[worker->enqueued_fence_count].value = value; + worker->enqueued_fences[worker->enqueued_fence_count].queue = queue; + worker->enqueued_fences[worker->enqueued_fence_count].queue_sequence_number = queue_sequence_number; + ++worker->enqueued_fence_count; + + InterlockedIncrement(&fence->pending_worker_operation_count); pthread_cond_signal(&worker->cond); pthread_mutex_unlock(&worker->mutex); @@ -287,37 +282,73 @@ static HRESULT vkd3d_enqueue_gpu_fence(struct vkd3d_fence_worker *worker, static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, struct d3d12_fence *fence) { - struct d3d12_device *device = worker->device; - const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; - unsigned int i, j; + LONG count; int rc; + if (!(count = atomic_add_fetch(&fence->pending_worker_operation_count, 0))) + return; + + WARN("Waiting for %u pending fence operations (fence %p).\n", count, fence); + if ((rc = pthread_mutex_lock(&worker->mutex))) { ERR("Failed to lock mutex, error %d.\n", rc); return; } - for (i = 0, j = 0; i < worker->fence_count; ++i) + while ((count = atomic_add_fetch(&fence->pending_worker_operation_count, 0))) { - if (worker->fences[i].fence == fence) - { - VK_CALL(vkDestroyFence(device->vk_device, worker->vk_fences[i], NULL)); - continue; - } + TRACE("Still waiting for %u pending fence operations (fence %p).\n", count, fence); - if (i != j) - { - worker->vk_fences[j] = worker->vk_fences[i]; - worker->fences[j] = worker->fences[i]; - } - ++j; + worker->pending_fence_destruction = true; + pthread_cond_signal(&worker->cond); + + pthread_cond_wait(&worker->fence_destruction_cond, &worker->mutex); } - worker->fence_count = j; + + TRACE("Removed fence %p.\n", fence); pthread_mutex_unlock(&worker->mutex); } +static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_worker *worker) +{ + unsigned int i; + size_t count; + bool ret; + + if (!worker->enqueued_fence_count) + return; + + count = worker->fence_count + worker->enqueued_fence_count; + + ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size, + count, sizeof(*worker->vk_fences)); + ret &= vkd3d_array_reserve((void **)&worker->fences, &worker->fences_size, + count, sizeof(*worker->fences)); + if (!ret) + { + ERR("Failed to reserve memory.\n"); + return; + } + + for (i = 0; i < worker->enqueued_fence_count; ++i) + { + struct vkd3d_enqueued_fence *current = &worker->enqueued_fences[i]; + + worker->vk_fences[worker->fence_count] = current->vk_fence; + + worker->fences[worker->fence_count].fence = current->fence; + worker->fences[worker->fence_count].value = current->value; + worker->fences[worker->fence_count].queue = current->queue; + worker->fences[worker->fence_count].queue_sequence_number = current->queue_sequence_number; + + ++worker->fence_count; + } + assert(worker->fence_count == count); + worker->enqueued_fence_count = 0; +} + static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker) { struct d3d12_device *device = worker->device; @@ -351,6 +382,8 @@ static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker) if (FAILED(hr = d3d12_fence_signal(current->fence, current->value, vk_fence))) ERR("Failed to signal D3D12 fence, hr %#x.\n", hr); + InterlockedDecrement(¤t->fence->pending_worker_operation_count); + vkd3d_queue_update_sequence_number(current->queue, current->queue_sequence_number, device); continue; } @@ -373,35 +406,48 @@ static void *vkd3d_fence_worker_main(void *arg) struct vkd3d_fence_worker *worker = arg; int rc; - vkd3d_set_thread_name("vkd3d_worker"); + vkd3d_set_thread_name("vkd3d_fence"); for (;;) { - if ((rc = pthread_mutex_lock(&worker->mutex))) - { - ERR("Failed to lock mutex, error %d.\n", rc); - return NULL; - } - - if (worker->should_exit && !worker->fence_count) - { - pthread_mutex_unlock(&worker->mutex); - break; - } - - if (!worker->fence_count) - { - if ((rc = pthread_cond_wait(&worker->cond, &worker->mutex))) - { - ERR("Failed to wait on condition variable, error %d.\n", rc); - pthread_mutex_unlock(&worker->mutex); - return NULL; - } - } - vkd3d_wait_for_gpu_fences(worker); - pthread_mutex_unlock(&worker->mutex); + if (!worker->fence_count || atomic_add_fetch(&worker->enqueued_fence_count, 0)) + { + if ((rc = pthread_mutex_lock(&worker->mutex))) + { + ERR("Failed to lock mutex, error %d.\n", rc); + break; + } + + if (worker->pending_fence_destruction) + { + pthread_cond_broadcast(&worker->fence_destruction_cond); + worker->pending_fence_destruction = false; + } + + if (worker->enqueued_fence_count) + { + vkd3d_fence_worker_move_enqueued_fences_locked(worker); + } + else + { + if (worker->should_exit) + { + pthread_mutex_unlock(&worker->mutex); + break; + } + + if ((rc = pthread_cond_wait(&worker->cond, &worker->mutex))) + { + ERR("Failed to wait on condition variable, error %d.\n", rc); + pthread_mutex_unlock(&worker->mutex); + break; + } + } + + pthread_mutex_unlock(&worker->mutex); + } } return NULL; @@ -416,8 +462,13 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, TRACE("worker %p.\n", worker); worker->should_exit = false; + worker->pending_fence_destruction = false; worker->device = device; + worker->enqueued_fence_count = 0; + worker->enqueued_fences = NULL; + worker->enqueued_fences_size = 0; + worker->fence_count = 0; worker->vk_fences = NULL; @@ -438,11 +489,20 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker, return hresult_from_errno(rc); } + if ((rc = pthread_cond_init(&worker->fence_destruction_cond, NULL))) + { + ERR("Failed to initialize condition variable, error %d.\n", rc); + pthread_mutex_destroy(&worker->mutex); + pthread_cond_destroy(&worker->cond); + return hresult_from_errno(rc); + } + if (FAILED(hr = vkd3d_create_thread(device->vkd3d_instance, vkd3d_fence_worker_main, worker, &worker->thread))) { pthread_mutex_destroy(&worker->mutex); pthread_cond_destroy(&worker->cond); + pthread_cond_destroy(&worker->fence_destruction_cond); } return hr; @@ -472,7 +532,9 @@ HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, pthread_mutex_destroy(&worker->mutex); pthread_cond_destroy(&worker->cond); + pthread_cond_destroy(&worker->fence_destruction_cond); + vkd3d_free(worker->enqueued_fences); vkd3d_free(worker->vk_fences); vkd3d_free(worker->fences); @@ -1047,6 +1109,8 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device * memset(fence->old_vk_fences, 0, sizeof(fence->old_vk_fences)); + fence->pending_worker_operation_count = 0; + if (FAILED(hr = vkd3d_private_store_init(&fence->private_store))) { pthread_mutex_destroy(&fence->mutex); diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index 9f00d327..3af8a959 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -156,7 +156,20 @@ struct vkd3d_fence_worker union vkd3d_thread_handle thread; pthread_mutex_t mutex; pthread_cond_t cond; + pthread_cond_t fence_destruction_cond; bool should_exit; + bool pending_fence_destruction; + + size_t enqueued_fence_count; + struct vkd3d_enqueued_fence + { + VkFence vk_fence; + struct d3d12_fence *fence; + uint64_t value; + struct vkd3d_queue *queue; + uint64_t queue_sequence_number; + } *enqueued_fences; + size_t enqueued_fences_size; size_t fence_count; VkFence *vk_fences; @@ -317,6 +330,8 @@ struct d3d12_fence struct list semaphores; unsigned int semaphore_count; + LONG pending_worker_operation_count; + VkFence old_vk_fences[VKD3D_MAX_VK_SYNC_OBJECTS]; struct d3d12_device *device;