mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2025-01-28 13:05:02 -08:00
vkd3d: Write Vulkan descriptors in a worker thread.
Raises framerate by 5-10% in games which write thousands of descriptors per frame, e.g. Horizon Zero Dawn. The worker thread is a generic device worker which can also be used for other purposes if the need arises.
This commit is contained in:
parent
70962ae7d8
commit
37e76618ca
Notes:
Alexandre Julliard
2023-12-14 23:31:17 +01:00
Approved-by: Giovanni Mascellani (@giomasce) Approved-by: Henri Verbeet (@hverbeet) Approved-by: Alexandre Julliard (@julliard) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/292
@ -2644,6 +2644,8 @@ static bool d3d12_command_list_update_compute_pipeline(struct d3d12_command_list
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs;
|
||||
|
||||
vkd3d_cond_signal(&list->device->worker_cond);
|
||||
|
||||
if (list->current_pipeline != VK_NULL_HANDLE)
|
||||
return true;
|
||||
|
||||
@ -2665,6 +2667,8 @@ static bool d3d12_command_list_update_graphics_pipeline(struct d3d12_command_lis
|
||||
VkRenderPass vk_render_pass;
|
||||
VkPipeline vk_pipeline;
|
||||
|
||||
vkd3d_cond_signal(&list->device->worker_cond);
|
||||
|
||||
if (list->current_pipeline != VK_NULL_HANDLE)
|
||||
return true;
|
||||
|
||||
|
@ -2495,6 +2495,28 @@ static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device5 *iface)
|
||||
return refcount;
|
||||
}
|
||||
|
||||
static HRESULT device_worker_stop(struct d3d12_device *device)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
TRACE("device %p.\n", device);
|
||||
|
||||
vkd3d_mutex_lock(&device->worker_mutex);
|
||||
|
||||
device->worker_should_exit = true;
|
||||
vkd3d_cond_signal(&device->worker_cond);
|
||||
|
||||
vkd3d_mutex_unlock(&device->worker_mutex);
|
||||
|
||||
if (FAILED(hr = vkd3d_join_thread(device->vkd3d_instance, &device->worker_thread)))
|
||||
return hr;
|
||||
|
||||
vkd3d_mutex_destroy(&device->worker_mutex);
|
||||
vkd3d_cond_destroy(&device->worker_cond);
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device5 *iface)
|
||||
{
|
||||
struct d3d12_device *device = impl_from_ID3D12Device5(iface);
|
||||
@ -2520,6 +2542,9 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device5 *iface)
|
||||
d3d12_device_destroy_vkd3d_queues(device);
|
||||
vkd3d_desc_object_cache_cleanup(&device->view_desc_cache);
|
||||
vkd3d_desc_object_cache_cleanup(&device->cbuffer_desc_cache);
|
||||
if (device->use_vk_heaps)
|
||||
device_worker_stop(device);
|
||||
vkd3d_free(device->heaps);
|
||||
VK_CALL(vkDestroyDevice(device->vk_device, NULL));
|
||||
if (device->parent)
|
||||
IUnknown_Release(device->parent);
|
||||
@ -4251,6 +4276,40 @@ struct d3d12_device *unsafe_impl_from_ID3D12Device5(ID3D12Device5 *iface)
|
||||
return impl_from_ID3D12Device5(iface);
|
||||
}
|
||||
|
||||
static void *device_worker_main(void *arg)
|
||||
{
|
||||
struct d3d12_descriptor_heap *heap;
|
||||
struct d3d12_device *device = arg;
|
||||
size_t i;
|
||||
|
||||
vkd3d_set_thread_name("device_worker");
|
||||
|
||||
vkd3d_mutex_lock(&device->worker_mutex);
|
||||
|
||||
while (!device->worker_should_exit)
|
||||
{
|
||||
for (i = 0; i < device->heap_count; ++i)
|
||||
{
|
||||
/* Descriptor updates are not written to Vulkan descriptor sets until a command list
|
||||
* is submitted to a queue, while the client is free to write d3d12 descriptors earlier,
|
||||
* from any thread. This causes a delay right before command list execution, so
|
||||
* handling these updates in a worker thread can speed up execution significantly. */
|
||||
heap = device->heaps[i];
|
||||
if (heap->dirty_list_head == UINT_MAX)
|
||||
continue;
|
||||
vkd3d_mutex_lock(&heap->vk_sets_mutex);
|
||||
d3d12_desc_flush_vk_heap_updates_locked(heap, device);
|
||||
vkd3d_mutex_unlock(&heap->vk_sets_mutex);
|
||||
}
|
||||
|
||||
vkd3d_cond_wait(&device->worker_cond, &device->worker_mutex);
|
||||
}
|
||||
|
||||
vkd3d_mutex_unlock(&device->worker_mutex);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static HRESULT d3d12_device_init(struct d3d12_device *device,
|
||||
struct vkd3d_instance *instance, const struct vkd3d_device_create_info *create_info)
|
||||
{
|
||||
@ -4270,6 +4329,14 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
|
||||
|
||||
device->vk_device = VK_NULL_HANDLE;
|
||||
|
||||
device->heaps = NULL;
|
||||
device->heap_capacity = 0;
|
||||
device->heap_count = 0;
|
||||
memset(&device->worker_thread, 0, sizeof(device->worker_thread));
|
||||
device->worker_should_exit = false;
|
||||
vkd3d_mutex_init(&device->worker_mutex);
|
||||
vkd3d_cond_init(&device->worker_cond);
|
||||
|
||||
if (FAILED(hr = vkd3d_create_vk_device(device, create_info)))
|
||||
goto out_free_instance;
|
||||
|
||||
@ -4291,6 +4358,13 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
|
||||
if (FAILED(hr = vkd3d_vk_descriptor_heap_layouts_init(device)))
|
||||
goto out_cleanup_uav_clear_state;
|
||||
|
||||
if (device->use_vk_heaps && FAILED(hr = vkd3d_create_thread(device->vkd3d_instance,
|
||||
device_worker_main, device, &device->worker_thread)))
|
||||
{
|
||||
WARN("Failed to create worker thread, hr %#x.\n", hr);
|
||||
goto out_cleanup_descriptor_heap_layouts;
|
||||
}
|
||||
|
||||
vkd3d_render_pass_cache_init(&device->render_pass_cache);
|
||||
vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
|
||||
vkd3d_time_domains_init(device);
|
||||
@ -4308,6 +4382,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
|
||||
|
||||
return S_OK;
|
||||
|
||||
out_cleanup_descriptor_heap_layouts:
|
||||
vkd3d_vk_descriptor_heap_layouts_cleanup(device);
|
||||
out_cleanup_uav_clear_state:
|
||||
vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device);
|
||||
out_destroy_null_resources:
|
||||
@ -4361,6 +4437,40 @@ void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason,
|
||||
device->removed_reason = reason;
|
||||
}
|
||||
|
||||
HRESULT d3d12_device_add_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap)
|
||||
{
|
||||
vkd3d_mutex_lock(&device->worker_mutex);
|
||||
|
||||
if (!vkd3d_array_reserve((void **)&device->heaps, &device->heap_capacity, device->heap_count + 1,
|
||||
sizeof(*device->heaps)))
|
||||
{
|
||||
vkd3d_mutex_unlock(&device->worker_mutex);
|
||||
return E_OUTOFMEMORY;
|
||||
}
|
||||
device->heaps[device->heap_count++] = heap;
|
||||
|
||||
vkd3d_mutex_unlock(&device->worker_mutex);
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
void d3d12_device_remove_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
vkd3d_mutex_lock(&device->worker_mutex);
|
||||
|
||||
for (i = 0; i < device->heap_count; ++i)
|
||||
{
|
||||
if (device->heaps[i] == heap)
|
||||
{
|
||||
device->heaps[i] = device->heaps[--device->heap_count];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
vkd3d_mutex_unlock(&device->worker_mutex);
|
||||
}
|
||||
|
||||
#ifdef _WIN32
|
||||
struct thread_data
|
||||
|
@ -3995,6 +3995,9 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea
|
||||
{
|
||||
struct d3d12_desc *descriptors = (struct d3d12_desc *)heap->descriptors;
|
||||
|
||||
if (heap->use_vk_heaps)
|
||||
d3d12_device_remove_descriptor_heap(device, heap);
|
||||
|
||||
for (i = 0; i < heap->desc.NumDescriptors; ++i)
|
||||
{
|
||||
d3d12_desc_destroy(&descriptors[i], device);
|
||||
@ -4318,6 +4321,12 @@ HRESULT d3d12_descriptor_heap_create(struct d3d12_device *device,
|
||||
dst[i].next = 0;
|
||||
}
|
||||
object->dirty_list_head = UINT_MAX;
|
||||
|
||||
if (object->use_vk_heaps && FAILED(hr = d3d12_device_add_descriptor_heap(device, object)))
|
||||
{
|
||||
vkd3d_free(object);
|
||||
return hr;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1804,6 +1804,14 @@ struct d3d12_device
|
||||
unsigned int vk_pool_count;
|
||||
struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT];
|
||||
bool use_vk_heaps;
|
||||
|
||||
struct d3d12_descriptor_heap **heaps;
|
||||
size_t heap_capacity;
|
||||
size_t heap_count;
|
||||
union vkd3d_thread_handle worker_thread;
|
||||
struct vkd3d_mutex worker_mutex;
|
||||
struct vkd3d_cond worker_cond;
|
||||
bool worker_should_exit;
|
||||
};
|
||||
|
||||
HRESULT d3d12_device_create(struct vkd3d_instance *instance,
|
||||
@ -1813,6 +1821,8 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent);
|
||||
void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason,
|
||||
const char *message, ...) VKD3D_PRINTF_FUNC(3, 4);
|
||||
struct d3d12_device *unsafe_impl_from_ID3D12Device5(ID3D12Device5 *iface);
|
||||
HRESULT d3d12_device_add_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap);
|
||||
void d3d12_device_remove_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap);
|
||||
|
||||
static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object)
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user