vkd3d: Write Vulkan descriptors in a worker thread.

Raises framerate by 5-10% in games which write thousands of descriptors
per frame, e.g. Horizon Zero Dawn.

The worker thread is a generic device worker which can also be used for
other purposes if the need arises.
This commit is contained in:
Conor McCarthy 2023-07-30 13:34:09 +10:00 committed by Alexandre Julliard
parent 70962ae7d8
commit 37e76618ca
Notes: Alexandre Julliard 2023-12-14 23:31:17 +01:00
Approved-by: Giovanni Mascellani (@giomasce)
Approved-by: Henri Verbeet (@hverbeet)
Approved-by: Alexandre Julliard (@julliard)
Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/292
4 changed files with 133 additions and 0 deletions

View File

@ -2644,6 +2644,8 @@ static bool d3d12_command_list_update_compute_pipeline(struct d3d12_command_list
{
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
vkd3d_cond_signal(&list->device->worker_cond);
if (list->current_pipeline != VK_NULL_HANDLE)
return true;
@ -2665,6 +2667,8 @@ static bool d3d12_command_list_update_graphics_pipeline(struct d3d12_command_lis
VkRenderPass vk_render_pass;
VkPipeline vk_pipeline;
vkd3d_cond_signal(&list->device->worker_cond);
if (list->current_pipeline != VK_NULL_HANDLE)
return true;

View File

@ -2495,6 +2495,28 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device5 *iface)
return refcount;
}
static HRESULT device_worker_stop(struct d3d12_device *device)
{
HRESULT hr;
TRACE("device %p.\n", device);
vkd3d_mutex_lock(&device->worker_mutex);
device->worker_should_exit = true;
vkd3d_cond_signal(&device->worker_cond);
vkd3d_mutex_unlock(&device->worker_mutex);
if (FAILED(hr = vkd3d_join_thread(device->vkd3d_instance, &device->worker_thread)))
return hr;
vkd3d_mutex_destroy(&device->worker_mutex);
vkd3d_cond_destroy(&device->worker_cond);
return S_OK;
}
static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device5 *iface)
{
struct d3d12_device *device = impl_from_ID3D12Device5(iface);
@ -2520,6 +2542,9 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device5 *iface)
d3d12_device_destroy_vkd3d_queues(device);
vkd3d_desc_object_cache_cleanup(&device->view_desc_cache);
vkd3d_desc_object_cache_cleanup(&device->cbuffer_desc_cache);
if (device->use_vk_heaps)
device_worker_stop(device);
vkd3d_free(device->heaps);
VK_CALL(vkDestroyDevice(device->vk_device, NULL));
if (device->parent)
IUnknown_Release(device->parent);
@ -4251,6 +4276,40 @@ struct d3d12_device *unsafe_impl_from_ID3D12Device5(ID3D12Device5 *iface)
return impl_from_ID3D12Device5(iface);
}
static void *device_worker_main(void *arg)
{
struct d3d12_descriptor_heap *heap;
struct d3d12_device *device = arg;
size_t i;
vkd3d_set_thread_name("device_worker");
vkd3d_mutex_lock(&device->worker_mutex);
while (!device->worker_should_exit)
{
for (i = 0; i < device->heap_count; ++i)
{
/* Descriptor updates are not written to Vulkan descriptor sets until a command list
* is submitted to a queue, while the client is free to write d3d12 descriptors earlier,
* from any thread. This causes a delay right before command list execution, so
* handling these updates in a worker thread can speed up execution significantly. */
heap = device->heaps[i];
if (heap->dirty_list_head == UINT_MAX)
continue;
vkd3d_mutex_lock(&heap->vk_sets_mutex);
d3d12_desc_flush_vk_heap_updates_locked(heap, device);
vkd3d_mutex_unlock(&heap->vk_sets_mutex);
}
vkd3d_cond_wait(&device->worker_cond, &device->worker_mutex);
}
vkd3d_mutex_unlock(&device->worker_mutex);
return NULL;
}
static HRESULT d3d12_device_init(struct d3d12_device *device,
struct vkd3d_instance *instance, const struct vkd3d_device_create_info *create_info)
{
@ -4270,6 +4329,14 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
device->vk_device = VK_NULL_HANDLE;
device->heaps = NULL;
device->heap_capacity = 0;
device->heap_count = 0;
memset(&device->worker_thread, 0, sizeof(device->worker_thread));
device->worker_should_exit = false;
vkd3d_mutex_init(&device->worker_mutex);
vkd3d_cond_init(&device->worker_cond);
if (FAILED(hr = vkd3d_create_vk_device(device, create_info)))
goto out_free_instance;
@ -4291,6 +4358,13 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
if (FAILED(hr = vkd3d_vk_descriptor_heap_layouts_init(device)))
goto out_cleanup_uav_clear_state;
if (device->use_vk_heaps && FAILED(hr = vkd3d_create_thread(device->vkd3d_instance,
device_worker_main, device, &device->worker_thread)))
{
WARN("Failed to create worker thread, hr %#x.\n", hr);
goto out_cleanup_descriptor_heap_layouts;
}
vkd3d_render_pass_cache_init(&device->render_pass_cache);
vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
vkd3d_time_domains_init(device);
@ -4308,6 +4382,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
return S_OK;
out_cleanup_descriptor_heap_layouts:
vkd3d_vk_descriptor_heap_layouts_cleanup(device);
out_cleanup_uav_clear_state:
vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device);
out_destroy_null_resources:
@ -4361,6 +4437,40 @@ void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason,
device->removed_reason = reason;
}
HRESULT d3d12_device_add_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap)
{
vkd3d_mutex_lock(&device->worker_mutex);
if (!vkd3d_array_reserve((void **)&device->heaps, &device->heap_capacity, device->heap_count + 1,
sizeof(*device->heaps)))
{
vkd3d_mutex_unlock(&device->worker_mutex);
return E_OUTOFMEMORY;
}
device->heaps[device->heap_count++] = heap;
vkd3d_mutex_unlock(&device->worker_mutex);
return S_OK;
}
void d3d12_device_remove_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap)
{
size_t i;
vkd3d_mutex_lock(&device->worker_mutex);
for (i = 0; i < device->heap_count; ++i)
{
if (device->heaps[i] == heap)
{
device->heaps[i] = device->heaps[--device->heap_count];
break;
}
}
vkd3d_mutex_unlock(&device->worker_mutex);
}
#ifdef _WIN32
struct thread_data

View File

@ -3995,6 +3995,9 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea
{
struct d3d12_desc *descriptors = (struct d3d12_desc *)heap->descriptors;
if (heap->use_vk_heaps)
d3d12_device_remove_descriptor_heap(device, heap);
for (i = 0; i < heap->desc.NumDescriptors; ++i)
{
d3d12_desc_destroy(&descriptors[i], device);
@ -4318,6 +4321,12 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device,
dst[i].next = 0;
}
object->dirty_list_head = UINT_MAX;
if (object->use_vk_heaps && FAILED(hr = d3d12_device_add_descriptor_heap(device, object)))
{
vkd3d_free(object);
return hr;
}
}
else
{

View File

@ -1804,6 +1804,14 @@ struct d3d12_device
unsigned int vk_pool_count;
struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT];
bool use_vk_heaps;
struct d3d12_descriptor_heap **heaps;
size_t heap_capacity;
size_t heap_count;
union vkd3d_thread_handle worker_thread;
struct vkd3d_mutex worker_mutex;
struct vkd3d_cond worker_cond;
bool worker_should_exit;
};
HRESULT d3d12_device_create(struct vkd3d_instance *instance,
@ -1813,6 +1821,8 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent);
void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason,
const char *message, ...) VKD3D_PRINTF_FUNC(3, 4);
struct d3d12_device *unsafe_impl_from_ID3D12Device5(ID3D12Device5 *iface);
HRESULT d3d12_device_add_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap);
void d3d12_device_remove_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap);
static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object)
{