mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2024-11-21 16:46:41 -08:00
vkd3d: Use Vulkan timeline semaphores for D3D12 fences.
D3D12 supports signalling a fence to a lower value, while Vulkan timeline semaphores do not. On the GPU side this is handled by simply submitting the signal anyway, if a test for this passes on device creation, because working around this is impractical. For CPU signals the Vulkan semaphore is replaced with a new one at the lower value only if no waits and/or signals are pending on the GPU. Otherwise, a fixme is emitted. Partly based on a vkd3d-proton patch by Hans-Kristian Arntzen (not including the handling of lower fence values). The old implementation is used if KHR_timeline_semaphore is not available or GPU signals do not work for a lower value. Signed-off-by: Conor McCarthy <cmccarthy@codeweavers.com> Signed-off-by: Henri Verbeet <hverbeet@codeweavers.com> Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
parent
e3045090a8
commit
22d8665300
@ -46,6 +46,9 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device,
|
||||
object->vk_queue_flags = properties->queueFlags;
|
||||
object->timestamp_bits = properties->timestampValidBits;
|
||||
|
||||
object->wait_completion_semaphore = VK_NULL_HANDLE;
|
||||
object->pending_wait_completion_value = 0;
|
||||
|
||||
object->semaphores = NULL;
|
||||
object->semaphores_size = 0;
|
||||
object->semaphore_count = 0;
|
||||
@ -61,6 +64,20 @@ HRESULT vkd3d_queue_create(struct d3d12_device *device,
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
bool vkd3d_queue_init_timeline_semaphore(struct vkd3d_queue *queue, struct d3d12_device *device)
|
||||
{
|
||||
VkResult vr;
|
||||
|
||||
if (!queue->wait_completion_semaphore
|
||||
&& (vr = vkd3d_create_timeline_semaphore(device, 0, &queue->wait_completion_semaphore)) < 0)
|
||||
{
|
||||
WARN("Failed to create timeline semaphore, vr %d.\n", vr);
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
@ -75,6 +92,8 @@ void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device)
|
||||
|
||||
vkd3d_free(queue->semaphores);
|
||||
|
||||
VK_CALL(vkDestroySemaphore(device->vk_device, queue->wait_completion_semaphore, NULL));
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(queue->old_vk_semaphores); ++i)
|
||||
{
|
||||
if (queue->old_vk_semaphores[i])
|
||||
@ -268,6 +287,7 @@ static HRESULT vkd3d_enqueue_gpu_fence(struct vkd3d_fence_worker *worker,
|
||||
}
|
||||
|
||||
worker->enqueued_fences[worker->enqueued_fence_count].vk_fence = vk_fence;
|
||||
worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = VK_NULL_HANDLE;
|
||||
waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence;
|
||||
waiting_fence->fence = fence;
|
||||
waiting_fence->value = value;
|
||||
@ -317,6 +337,7 @@ static void vkd3d_fence_worker_remove_fence(struct vkd3d_fence_worker *worker, s
|
||||
static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_worker *worker)
|
||||
{
|
||||
unsigned int i;
|
||||
bool timeline;
|
||||
size_t count;
|
||||
bool ret;
|
||||
|
||||
@ -325,8 +346,18 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo
|
||||
|
||||
count = worker->fence_count + worker->enqueued_fence_count;
|
||||
|
||||
ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size,
|
||||
count, sizeof(*worker->vk_fences));
|
||||
if ((timeline = worker->device->use_timeline_semaphores))
|
||||
{
|
||||
ret = vkd3d_array_reserve((void **) &worker->vk_semaphores, &worker->vk_semaphores_size,
|
||||
count, sizeof(*worker->vk_semaphores));
|
||||
ret &= vkd3d_array_reserve((void **) &worker->semaphore_wait_values, &worker->semaphore_wait_values_size,
|
||||
count, sizeof(*worker->semaphore_wait_values));
|
||||
}
|
||||
else
|
||||
{
|
||||
ret = vkd3d_array_reserve((void **)&worker->vk_fences, &worker->vk_fences_size,
|
||||
count, sizeof(*worker->vk_fences));
|
||||
}
|
||||
ret &= vkd3d_array_reserve((void **)&worker->fences, &worker->fences_size,
|
||||
count, sizeof(*worker->fences));
|
||||
if (!ret)
|
||||
@ -339,7 +370,16 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo
|
||||
{
|
||||
struct vkd3d_enqueued_fence *current = &worker->enqueued_fences[i];
|
||||
|
||||
worker->vk_fences[worker->fence_count] = current->vk_fence;
|
||||
if (timeline)
|
||||
{
|
||||
worker->vk_semaphores[worker->fence_count] = current->vk_semaphore;
|
||||
worker->semaphore_wait_values[worker->fence_count] = current->waiting_fence.value;
|
||||
}
|
||||
else
|
||||
{
|
||||
worker->vk_fences[worker->fence_count] = current->vk_fence;
|
||||
}
|
||||
|
||||
worker->fences[worker->fence_count] = current->waiting_fence;
|
||||
++worker->fence_count;
|
||||
}
|
||||
@ -347,6 +387,66 @@ static void vkd3d_fence_worker_move_enqueued_fences_locked(struct vkd3d_fence_wo
|
||||
worker->enqueued_fence_count = 0;
|
||||
}
|
||||
|
||||
static void vkd3d_wait_for_gpu_timeline_semaphores(struct vkd3d_fence_worker *worker)
|
||||
{
|
||||
const struct d3d12_device *device = worker->device;
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkSemaphoreWaitInfoKHR wait_info;
|
||||
VkSemaphore vk_semaphore;
|
||||
uint64_t counter_value;
|
||||
unsigned int i, j;
|
||||
HRESULT hr;
|
||||
int vr;
|
||||
|
||||
if (!worker->fence_count)
|
||||
return;
|
||||
|
||||
wait_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_WAIT_INFO_KHR;
|
||||
wait_info.pNext = NULL;
|
||||
wait_info.flags = VK_SEMAPHORE_WAIT_ANY_BIT_KHR;
|
||||
wait_info.pSemaphores = worker->vk_semaphores;
|
||||
wait_info.semaphoreCount = worker->fence_count;
|
||||
wait_info.pValues = worker->semaphore_wait_values;
|
||||
|
||||
vr = VK_CALL(vkWaitSemaphoresKHR(device->vk_device, &wait_info, ~(uint64_t)0));
|
||||
if (vr == VK_TIMEOUT)
|
||||
return;
|
||||
if (vr != VK_SUCCESS)
|
||||
{
|
||||
ERR("Failed to wait for Vulkan timeline semaphores, vr %d.\n", vr);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0, j = 0; i < worker->fence_count; ++i)
|
||||
{
|
||||
struct vkd3d_waiting_fence *current = &worker->fences[i];
|
||||
|
||||
vk_semaphore = worker->vk_semaphores[i];
|
||||
if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, vk_semaphore, &counter_value))) < 0)
|
||||
{
|
||||
ERR("Failed to get Vulkan semaphore value, vr %d.\n", vr);
|
||||
}
|
||||
else if (counter_value >= current->value)
|
||||
{
|
||||
TRACE("Signaling fence %p value %#"PRIx64".\n", current->fence, current->value);
|
||||
if (FAILED(hr = d3d12_fence_signal(current->fence, counter_value, VK_NULL_HANDLE)))
|
||||
ERR("Failed to signal D3D12 fence, hr %#x.\n", hr);
|
||||
|
||||
InterlockedDecrement(¤t->fence->pending_worker_operation_count);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (i != j)
|
||||
{
|
||||
worker->vk_semaphores[j] = worker->vk_semaphores[i];
|
||||
worker->semaphore_wait_values[j] = worker->semaphore_wait_values[i];
|
||||
worker->fences[j] = worker->fences[i];
|
||||
}
|
||||
++j;
|
||||
}
|
||||
worker->fence_count = j;
|
||||
}
|
||||
|
||||
static void vkd3d_wait_for_gpu_fences(struct vkd3d_fence_worker *worker)
|
||||
{
|
||||
struct d3d12_device *device = worker->device;
|
||||
@ -408,7 +508,7 @@ static void *vkd3d_fence_worker_main(void *arg)
|
||||
|
||||
for (;;)
|
||||
{
|
||||
vkd3d_wait_for_gpu_fences(worker);
|
||||
worker->wait_for_gpu_fences(worker);
|
||||
|
||||
if (!worker->fence_count || InterlockedAdd(&worker->enqueued_fence_count, 0))
|
||||
{
|
||||
@ -473,6 +573,13 @@ HRESULT vkd3d_fence_worker_start(struct vkd3d_fence_worker *worker,
|
||||
worker->vk_fences_size = 0;
|
||||
worker->fences = NULL;
|
||||
worker->fences_size = 0;
|
||||
worker->vk_semaphores = NULL;
|
||||
worker->vk_semaphores_size = 0;
|
||||
worker->semaphore_wait_values = NULL;
|
||||
worker->semaphore_wait_values_size = 0;
|
||||
|
||||
worker->wait_for_gpu_fences = device->use_timeline_semaphores
|
||||
? vkd3d_wait_for_gpu_timeline_semaphores : vkd3d_wait_for_gpu_fences;
|
||||
|
||||
if ((rc = vkd3d_mutex_init(&worker->mutex)))
|
||||
{
|
||||
@ -535,6 +642,8 @@ HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker,
|
||||
vkd3d_free(worker->enqueued_fences);
|
||||
vkd3d_free(worker->vk_fences);
|
||||
vkd3d_free(worker->fences);
|
||||
vkd3d_free(worker->vk_semaphores);
|
||||
vkd3d_free(worker->semaphore_wait_values);
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
@ -684,6 +793,7 @@ static void d3d12_fence_destroy_vk_objects(struct d3d12_fence *fence)
|
||||
}
|
||||
|
||||
d3d12_fence_garbage_collect_vk_semaphores_locked(fence, true);
|
||||
VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL));
|
||||
|
||||
vkd3d_mutex_unlock(&fence->mutex);
|
||||
}
|
||||
@ -802,31 +912,21 @@ static HRESULT d3d12_fence_add_vk_semaphore(struct d3d12_fence *fence,
|
||||
return hr;
|
||||
}
|
||||
|
||||
static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence)
|
||||
static bool d3d12_fence_signal_external_events_locked(struct d3d12_fence *fence)
|
||||
{
|
||||
struct d3d12_device *device = fence->device;
|
||||
struct vkd3d_signaled_semaphore *current;
|
||||
bool signal_null_event_cond = false;
|
||||
unsigned int i, j;
|
||||
int rc;
|
||||
|
||||
if ((rc = vkd3d_mutex_lock(&fence->mutex)))
|
||||
{
|
||||
ERR("Failed to lock mutex, error %d.\n", rc);
|
||||
return hresult_from_errno(rc);
|
||||
}
|
||||
|
||||
fence->value = value;
|
||||
|
||||
for (i = 0, j = 0; i < fence->event_count; ++i)
|
||||
{
|
||||
struct vkd3d_waiting_event *current = &fence->events[i];
|
||||
|
||||
if (current->value <= value)
|
||||
if (current->value <= fence->value)
|
||||
{
|
||||
if (current->event)
|
||||
{
|
||||
fence->device->signal_event(current->event);
|
||||
device->signal_event(current->event);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -841,9 +941,28 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF
|
||||
++j;
|
||||
}
|
||||
}
|
||||
|
||||
fence->event_count = j;
|
||||
|
||||
if (signal_null_event_cond)
|
||||
return signal_null_event_cond;
|
||||
}
|
||||
|
||||
static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkFence vk_fence)
|
||||
{
|
||||
struct d3d12_device *device = fence->device;
|
||||
struct vkd3d_signaled_semaphore *current;
|
||||
unsigned int i;
|
||||
int rc;
|
||||
|
||||
if ((rc = vkd3d_mutex_lock(&fence->mutex)))
|
||||
{
|
||||
ERR("Failed to lock mutex, error %d.\n", rc);
|
||||
return hresult_from_errno(rc);
|
||||
}
|
||||
|
||||
fence->value = value;
|
||||
|
||||
if (d3d12_fence_signal_external_events_locked(fence))
|
||||
vkd3d_cond_broadcast(&fence->null_event_cond);
|
||||
|
||||
if (vk_fence)
|
||||
@ -1069,12 +1188,160 @@ static HRESULT STDMETHODCALLTYPE d3d12_fence_SetEventOnCompletion(ID3D12Fence *i
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
static inline bool d3d12_fence_gpu_wait_is_completed(const struct d3d12_fence *fence, unsigned int i)
|
||||
{
|
||||
const struct d3d12_device *device = fence->device;
|
||||
const struct vkd3d_vk_device_procs *vk_procs;
|
||||
uint64_t value;
|
||||
VkResult vr;
|
||||
|
||||
vk_procs = &device->vk_procs;
|
||||
|
||||
if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device,
|
||||
fence->gpu_waits[i].queue->wait_completion_semaphore, &value))) >= 0)
|
||||
{
|
||||
return value >= fence->gpu_waits[i].pending_value;
|
||||
}
|
||||
|
||||
ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr);
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool d3d12_fence_has_pending_gpu_ops_locked(struct d3d12_fence *fence)
|
||||
{
|
||||
const struct d3d12_device *device = fence->device;
|
||||
const struct vkd3d_vk_device_procs *vk_procs;
|
||||
uint64_t value;
|
||||
unsigned int i;
|
||||
VkResult vr;
|
||||
|
||||
for (i = 0; i < fence->gpu_wait_count; ++i)
|
||||
{
|
||||
if (d3d12_fence_gpu_wait_is_completed(fence, i) && i < --fence->gpu_wait_count)
|
||||
fence->gpu_waits[i] = fence->gpu_waits[fence->gpu_wait_count];
|
||||
}
|
||||
if (fence->gpu_wait_count)
|
||||
return true;
|
||||
|
||||
/* Check for pending signals too. */
|
||||
if (fence->value >= fence->pending_timeline_value)
|
||||
return false;
|
||||
|
||||
vk_procs = &device->vk_procs;
|
||||
|
||||
/* Check the actual semaphore value in case fence->value update is lagging. */
|
||||
if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, fence->timeline_semaphore, &value))) < 0)
|
||||
{
|
||||
ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr);
|
||||
return false;
|
||||
}
|
||||
|
||||
return value < fence->pending_timeline_value;
|
||||
}
|
||||
|
||||
/* Replace the VkSemaphore with a new one to allow a lower value to be set. Ideally apps will
|
||||
* only use this to reset the fence when no operations are pending on the queue. */
|
||||
static HRESULT d3d12_fence_reinit_timeline_semaphore_locked(struct d3d12_fence *fence, uint64_t value)
|
||||
{
|
||||
const struct d3d12_device *device = fence->device;
|
||||
const struct vkd3d_vk_device_procs *vk_procs;
|
||||
VkSemaphore timeline_semaphore;
|
||||
VkResult vr;
|
||||
|
||||
if (d3d12_fence_has_pending_gpu_ops_locked(fence))
|
||||
{
|
||||
/* This situation is not very likely because it means a fence with pending waits and/or signals was
|
||||
* signalled on the CPU to a lower value. For now, emit a fixme so it can be patched if necessary.
|
||||
* A patch already exists for this but it's not pretty. */
|
||||
FIXME("Unable to re-initialise timeline semaphore to a lower value due to pending GPU ops.\n");
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
if ((vr = vkd3d_create_timeline_semaphore(device, value, &timeline_semaphore)) < 0)
|
||||
{
|
||||
WARN("Failed to create timeline semaphore, vr %d.\n", vr);
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
fence->value = value;
|
||||
fence->pending_timeline_value = value;
|
||||
|
||||
WARN("Replacing timeline semaphore with a new object.\n");
|
||||
|
||||
vk_procs = &device->vk_procs;
|
||||
|
||||
VK_CALL(vkDestroySemaphore(device->vk_device, fence->timeline_semaphore, NULL));
|
||||
fence->timeline_semaphore = timeline_semaphore;
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
static HRESULT d3d12_fence_signal_cpu_timeline_semaphore(struct d3d12_fence *fence, uint64_t value)
|
||||
{
|
||||
const struct d3d12_device *device = fence->device;
|
||||
VkSemaphoreSignalInfoKHR info;
|
||||
HRESULT hr = S_OK;
|
||||
VkResult vr;
|
||||
int rc;
|
||||
|
||||
if ((rc = vkd3d_mutex_lock(&fence->mutex)))
|
||||
{
|
||||
ERR("Failed to lock mutex, error %d.\n", rc);
|
||||
return hresult_from_errno(rc);
|
||||
}
|
||||
|
||||
/* We must only signal a value which is greater than the current value.
|
||||
* That value can be in the range of current known value (fence->value), or as large as pending_timeline_value.
|
||||
* Pending timeline value signal might be blocked by another synchronization primitive, and thus statically
|
||||
* cannot be that value, so the safest thing to do is to check the current value which is updated by the fence
|
||||
* wait thread continuously. This check is technically racy since the value might be immediately out of date,
|
||||
* but there is no way to avoid this. */
|
||||
if (value > fence->value)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
|
||||
/* Sanity check against the delta limit. */
|
||||
if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference)
|
||||
{
|
||||
FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n",
|
||||
value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference);
|
||||
}
|
||||
|
||||
info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_SIGNAL_INFO_KHR;
|
||||
info.pNext = NULL;
|
||||
info.semaphore = fence->timeline_semaphore;
|
||||
info.value = value;
|
||||
if ((vr = VK_CALL(vkSignalSemaphoreKHR(device->vk_device, &info))) >= 0)
|
||||
{
|
||||
fence->value = value;
|
||||
if (value > fence->pending_timeline_value)
|
||||
fence->pending_timeline_value = value;
|
||||
}
|
||||
else
|
||||
{
|
||||
ERR("Failed to signal timeline semaphore, vr %d.\n", vr);
|
||||
hr = hresult_from_vk_result(vr);
|
||||
}
|
||||
}
|
||||
else if (value < fence->value)
|
||||
{
|
||||
hr = d3d12_fence_reinit_timeline_semaphore_locked(fence, value);
|
||||
}
|
||||
|
||||
d3d12_fence_signal_external_events_locked(fence);
|
||||
|
||||
vkd3d_mutex_unlock(&fence->mutex);
|
||||
return hr;
|
||||
}
|
||||
|
||||
static HRESULT STDMETHODCALLTYPE d3d12_fence_Signal(ID3D12Fence *iface, UINT64 value)
|
||||
{
|
||||
struct d3d12_fence *fence = impl_from_ID3D12Fence(iface);
|
||||
|
||||
TRACE("iface %p, value %#"PRIx64".\n", iface, value);
|
||||
|
||||
if (fence->timeline_semaphore)
|
||||
return d3d12_fence_signal_cpu_timeline_semaphore(fence, value);
|
||||
return d3d12_fence_signal(fence, value, VK_NULL_HANDLE);
|
||||
}
|
||||
|
||||
@ -1108,6 +1375,7 @@ static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface)
|
||||
static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *device,
|
||||
UINT64 initial_value, D3D12_FENCE_FLAGS flags)
|
||||
{
|
||||
VkResult vr;
|
||||
HRESULT hr;
|
||||
int rc;
|
||||
|
||||
@ -1136,6 +1404,16 @@ static HRESULT d3d12_fence_init(struct d3d12_fence *fence, struct d3d12_device *
|
||||
fence->events_size = 0;
|
||||
fence->event_count = 0;
|
||||
|
||||
fence->timeline_semaphore = VK_NULL_HANDLE;
|
||||
if (device->use_timeline_semaphores && (vr = vkd3d_create_timeline_semaphore(device, initial_value,
|
||||
&fence->timeline_semaphore)) < 0)
|
||||
{
|
||||
WARN("Failed to create timeline semaphore, vr %d.\n", vr);
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
fence->pending_timeline_value = initial_value;
|
||||
fence->gpu_wait_count = 0;
|
||||
|
||||
list_init(&fence->semaphores);
|
||||
fence->semaphore_count = 0;
|
||||
|
||||
@ -1172,6 +1450,25 @@ HRESULT d3d12_fence_create(struct d3d12_device *device,
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint64_t initial_value,
|
||||
VkSemaphore *timeline_semaphore)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkSemaphoreTypeCreateInfoKHR type_info;
|
||||
VkSemaphoreCreateInfo info;
|
||||
|
||||
info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
|
||||
info.pNext = &type_info;
|
||||
info.flags = 0;
|
||||
|
||||
type_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_TYPE_CREATE_INFO_KHR;
|
||||
type_info.pNext = NULL;
|
||||
type_info.semaphoreType = VK_SEMAPHORE_TYPE_TIMELINE_KHR;
|
||||
type_info.initialValue = initial_value;
|
||||
|
||||
return VK_CALL(vkCreateSemaphore(device->vk_device, &info, NULL, timeline_semaphore));
|
||||
}
|
||||
|
||||
/* Command buffers */
|
||||
static void d3d12_command_list_mark_as_invalid(struct d3d12_command_list *list,
|
||||
const char *message, ...)
|
||||
@ -6138,18 +6435,88 @@ static void STDMETHODCALLTYPE d3d12_command_queue_EndEvent(ID3D12CommandQueue *i
|
||||
FIXME("iface %p stub!\n", iface);
|
||||
}
|
||||
|
||||
static HRESULT d3d12_fence_update_gpu_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t value)
|
||||
{
|
||||
const struct d3d12_device *device = fence->device;
|
||||
int rc;
|
||||
|
||||
if ((rc = vkd3d_mutex_lock(&fence->mutex)))
|
||||
{
|
||||
ERR("Failed to lock mutex, error %d.\n", rc);
|
||||
return hresult_from_errno(rc);
|
||||
}
|
||||
|
||||
/* If we're attempting to async signal a fence with a value which is not strictly increasing the payload value,
|
||||
* warn about this case. Do not treat this as an error since it works at least with RADV and Nvidia drivers and
|
||||
* there's no workaround on the GPU side. */
|
||||
if (value <= fence->pending_timeline_value)
|
||||
{
|
||||
WARN("Fence %p values are not strictly increasing. Pending values: old %"PRIu64", new %"PRIu64".\n",
|
||||
fence, fence->pending_timeline_value, value);
|
||||
}
|
||||
/* Sanity check against the delta limit. Use the current fence value. */
|
||||
else if (value - fence->value > device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference)
|
||||
{
|
||||
FIXME("Timeline semaphore delta is %"PRIu64", but implementation only supports a delta of %"PRIu64".\n",
|
||||
value - fence->value, device->vk_info.timeline_semaphore_properties.maxTimelineSemaphoreValueDifference);
|
||||
}
|
||||
fence->pending_timeline_value = value;
|
||||
|
||||
vkd3d_mutex_unlock(&fence->mutex);
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_enqueue_timeline_semaphore(struct vkd3d_fence_worker *worker, VkSemaphore vk_semaphore,
|
||||
struct d3d12_fence *fence, uint64_t value, struct vkd3d_queue *queue)
|
||||
{
|
||||
struct vkd3d_waiting_fence *waiting_fence;
|
||||
int rc;
|
||||
|
||||
TRACE("worker %p, fence %p, value %#"PRIx64".\n", worker, fence, value);
|
||||
|
||||
if ((rc = vkd3d_mutex_lock(&worker->mutex)))
|
||||
{
|
||||
ERR("Failed to lock mutex, error %d.\n", rc);
|
||||
return hresult_from_errno(rc);
|
||||
}
|
||||
|
||||
if (!vkd3d_array_reserve((void **)&worker->enqueued_fences, &worker->enqueued_fences_size,
|
||||
worker->enqueued_fence_count + 1, sizeof(*worker->enqueued_fences)))
|
||||
{
|
||||
ERR("Failed to add GPU timeline semaphore.\n");
|
||||
vkd3d_mutex_unlock(&worker->mutex);
|
||||
return E_OUTOFMEMORY;
|
||||
}
|
||||
|
||||
worker->enqueued_fences[worker->enqueued_fence_count].vk_semaphore = vk_semaphore;
|
||||
waiting_fence = &worker->enqueued_fences[worker->enqueued_fence_count].waiting_fence;
|
||||
waiting_fence->fence = fence;
|
||||
waiting_fence->value = value;
|
||||
waiting_fence->queue = queue;
|
||||
++worker->enqueued_fence_count;
|
||||
|
||||
InterlockedIncrement(&fence->pending_worker_operation_count);
|
||||
|
||||
vkd3d_cond_signal(&worker->cond);
|
||||
vkd3d_mutex_unlock(&worker->mutex);
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *iface,
|
||||
ID3D12Fence *fence_iface, UINT64 value)
|
||||
{
|
||||
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
||||
VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info;
|
||||
const struct vkd3d_vk_device_procs *vk_procs;
|
||||
VkSemaphore vk_semaphore = VK_NULL_HANDLE;
|
||||
VkFence vk_fence = VK_NULL_HANDLE;
|
||||
struct vkd3d_queue *vkd3d_queue;
|
||||
uint64_t sequence_number = 0;
|
||||
struct d3d12_device *device;
|
||||
struct d3d12_fence *fence;
|
||||
VkSubmitInfo submit_info;
|
||||
uint64_t sequence_number;
|
||||
VkQueue vk_queue;
|
||||
VkResult vr;
|
||||
HRESULT hr;
|
||||
@ -6162,10 +6529,21 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *
|
||||
|
||||
fence = unsafe_impl_from_ID3D12Fence(fence_iface);
|
||||
|
||||
if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0)
|
||||
if (device->use_timeline_semaphores)
|
||||
{
|
||||
WARN("Failed to create Vulkan fence, vr %d.\n", vr);
|
||||
goto fail_vkresult;
|
||||
if (FAILED(hr = d3d12_fence_update_gpu_signal_timeline_semaphore(fence, value)))
|
||||
return hr;
|
||||
|
||||
vk_semaphore = fence->timeline_semaphore;
|
||||
assert(vk_semaphore);
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((vr = d3d12_fence_create_vk_fence(fence, &vk_fence)) < 0)
|
||||
{
|
||||
WARN("Failed to create Vulkan fence, vr %d.\n", vr);
|
||||
goto fail_vkresult;
|
||||
}
|
||||
}
|
||||
|
||||
if (!(vk_queue = vkd3d_queue_acquire(vkd3d_queue)))
|
||||
@ -6175,7 +6553,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *
|
||||
goto fail;
|
||||
}
|
||||
|
||||
if ((vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue, device, &vk_semaphore)) < 0)
|
||||
if (!device->use_timeline_semaphores && (vr = vkd3d_queue_create_vk_semaphore_locked(vkd3d_queue,
|
||||
device, &vk_semaphore)) < 0)
|
||||
{
|
||||
ERR("Failed to create Vulkan semaphore, vr %d.\n", vr);
|
||||
vk_semaphore = VK_NULL_HANDLE;
|
||||
@ -6191,7 +6570,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *
|
||||
submit_info.signalSemaphoreCount = vk_semaphore ? 1 : 0;
|
||||
submit_info.pSignalSemaphores = &vk_semaphore;
|
||||
|
||||
if ((vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence))) >= 0)
|
||||
if (device->use_timeline_semaphores)
|
||||
{
|
||||
timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR;
|
||||
timeline_submit_info.pNext = NULL;
|
||||
timeline_submit_info.pSignalSemaphoreValues = &value;
|
||||
timeline_submit_info.signalSemaphoreValueCount = submit_info.signalSemaphoreCount;
|
||||
timeline_submit_info.waitSemaphoreValueCount = 0;
|
||||
timeline_submit_info.pWaitSemaphoreValues = NULL;
|
||||
submit_info.pNext = &timeline_submit_info;
|
||||
}
|
||||
|
||||
vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, vk_fence));
|
||||
if (!device->use_timeline_semaphores && vr >= 0)
|
||||
{
|
||||
sequence_number = ++vkd3d_queue->submitted_sequence_number;
|
||||
|
||||
@ -6208,6 +6599,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue *
|
||||
goto fail_vkresult;
|
||||
}
|
||||
|
||||
if (device->use_timeline_semaphores)
|
||||
return vkd3d_enqueue_timeline_semaphore(&device->fence_worker, vk_semaphore, fence, value, vkd3d_queue);
|
||||
|
||||
if (vk_semaphore && SUCCEEDED(hr = d3d12_fence_add_vk_semaphore(fence, vk_semaphore, vk_fence, value)))
|
||||
vk_semaphore = VK_NULL_HANDLE;
|
||||
|
||||
@ -6243,32 +6637,27 @@ fail_vkresult:
|
||||
hr = hresult_from_vk_result(vr);
|
||||
fail:
|
||||
VK_CALL(vkDestroyFence(device->vk_device, vk_fence, NULL));
|
||||
VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL));
|
||||
if (!device->use_timeline_semaphores)
|
||||
VK_CALL(vkDestroySemaphore(device->vk_device, vk_semaphore, NULL));
|
||||
return hr;
|
||||
}
|
||||
|
||||
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface,
|
||||
ID3D12Fence *fence_iface, UINT64 value)
|
||||
static HRESULT d3d12_command_queue_wait_binary_semaphore(struct d3d12_command_queue *command_queue,
|
||||
struct d3d12_fence *fence, uint64_t value)
|
||||
{
|
||||
static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
|
||||
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
||||
const struct vkd3d_vk_device_procs *vk_procs;
|
||||
struct vkd3d_signaled_semaphore *semaphore;
|
||||
uint64_t completed_value = 0;
|
||||
struct vkd3d_queue *queue;
|
||||
struct d3d12_fence *fence;
|
||||
VkSubmitInfo submit_info;
|
||||
VkQueue vk_queue;
|
||||
VkResult vr;
|
||||
HRESULT hr;
|
||||
|
||||
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
|
||||
|
||||
vk_procs = &command_queue->device->vk_procs;
|
||||
queue = command_queue->vkd3d_queue;
|
||||
|
||||
fence = unsafe_impl_from_ID3D12Fence(fence_iface);
|
||||
|
||||
semaphore = d3d12_fence_acquire_vk_semaphore(fence, value, &completed_value);
|
||||
if (!semaphore && completed_value >= value)
|
||||
{
|
||||
@ -6346,6 +6735,122 @@ fail:
|
||||
return hr;
|
||||
}
|
||||
|
||||
static inline void d3d12_fence_update_gpu_wait(struct d3d12_fence *fence, const struct vkd3d_queue *queue)
|
||||
{
|
||||
unsigned int i;
|
||||
bool found;
|
||||
int rc;
|
||||
|
||||
if ((rc = vkd3d_mutex_lock(&fence->mutex)))
|
||||
{
|
||||
ERR("Failed to lock mutex, error %d.\n", rc);
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0, found = false; i < fence->gpu_wait_count; ++i)
|
||||
{
|
||||
if (fence->gpu_waits[i].queue == queue)
|
||||
{
|
||||
fence->gpu_waits[i].pending_value = queue->pending_wait_completion_value;
|
||||
found = true;
|
||||
}
|
||||
else if (d3d12_fence_gpu_wait_is_completed(fence, i) && i < --fence->gpu_wait_count)
|
||||
{
|
||||
fence->gpu_waits[i] = fence->gpu_waits[fence->gpu_wait_count];
|
||||
}
|
||||
}
|
||||
|
||||
if (!found)
|
||||
{
|
||||
if (fence->gpu_wait_count < ARRAY_SIZE(fence->gpu_waits))
|
||||
{
|
||||
fence->gpu_waits[fence->gpu_wait_count].queue = queue;
|
||||
fence->gpu_waits[fence->gpu_wait_count++].pending_value = queue->pending_wait_completion_value;
|
||||
}
|
||||
else
|
||||
{
|
||||
FIXME("Unable to track GPU fence wait.\n");
|
||||
}
|
||||
}
|
||||
|
||||
vkd3d_mutex_unlock(&fence->mutex);
|
||||
}
|
||||
|
||||
static HRESULT d3d12_command_queue_wait_timeline_semaphore(struct d3d12_command_queue *command_queue,
|
||||
struct d3d12_fence *fence, uint64_t value)
|
||||
{
|
||||
static const VkPipelineStageFlagBits wait_stage_mask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
|
||||
VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info;
|
||||
const struct vkd3d_vk_device_procs *vk_procs;
|
||||
struct vkd3d_queue *queue;
|
||||
VkSubmitInfo submit_info;
|
||||
VkQueue vk_queue;
|
||||
VkResult vr;
|
||||
|
||||
vk_procs = &command_queue->device->vk_procs;
|
||||
queue = command_queue->vkd3d_queue;
|
||||
|
||||
assert(fence->timeline_semaphore);
|
||||
timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR;
|
||||
timeline_submit_info.pNext = NULL;
|
||||
timeline_submit_info.signalSemaphoreValueCount = 0;
|
||||
timeline_submit_info.pSignalSemaphoreValues = NULL;
|
||||
timeline_submit_info.waitSemaphoreValueCount = 1;
|
||||
timeline_submit_info.pWaitSemaphoreValues = &value;
|
||||
|
||||
if (!(vk_queue = vkd3d_queue_acquire(queue)))
|
||||
{
|
||||
ERR("Failed to acquire queue %p.\n", queue);
|
||||
return E_FAIL;
|
||||
}
|
||||
|
||||
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submit_info.pNext = &timeline_submit_info;
|
||||
submit_info.waitSemaphoreCount = 1;
|
||||
submit_info.pWaitSemaphores = &fence->timeline_semaphore;
|
||||
submit_info.pWaitDstStageMask = &wait_stage_mask;
|
||||
submit_info.commandBufferCount = 0;
|
||||
submit_info.pCommandBuffers = NULL;
|
||||
submit_info.signalSemaphoreCount = 0;
|
||||
submit_info.pSignalSemaphores = NULL;
|
||||
|
||||
++queue->pending_wait_completion_value;
|
||||
|
||||
submit_info.signalSemaphoreCount = 1;
|
||||
submit_info.pSignalSemaphores = &queue->wait_completion_semaphore;
|
||||
timeline_submit_info.signalSemaphoreValueCount = 1;
|
||||
timeline_submit_info.pSignalSemaphoreValues = &queue->pending_wait_completion_value;
|
||||
|
||||
d3d12_fence_update_gpu_wait(fence, queue);
|
||||
|
||||
vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE));
|
||||
|
||||
vkd3d_queue_release(queue);
|
||||
|
||||
if (vr < 0)
|
||||
{
|
||||
WARN("Failed to submit wait operation, vr %d.\n", vr);
|
||||
return hresult_from_vk_result(vr);
|
||||
}
|
||||
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *iface,
|
||||
ID3D12Fence *fence_iface, UINT64 value)
|
||||
{
|
||||
struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface);
|
||||
struct d3d12_fence *fence = unsafe_impl_from_ID3D12Fence(fence_iface);
|
||||
|
||||
TRACE("iface %p, fence %p, value %#"PRIx64".\n", iface, fence_iface, value);
|
||||
|
||||
if (command_queue->device->use_timeline_semaphores)
|
||||
return d3d12_command_queue_wait_timeline_semaphore(command_queue, fence, value);
|
||||
|
||||
FIXME_ONCE("KHR_timeline_semaphore is not available or incompatible. Some wait commands may be unsupported.\n");
|
||||
return d3d12_command_queue_wait_binary_semaphore(command_queue, fence, value);
|
||||
}
|
||||
|
||||
static HRESULT STDMETHODCALLTYPE d3d12_command_queue_GetTimestampFrequency(ID3D12CommandQueue *iface,
|
||||
UINT64 *frequency)
|
||||
{
|
||||
|
@ -129,6 +129,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] =
|
||||
VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3),
|
||||
VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor),
|
||||
VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge),
|
||||
VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore),
|
||||
/* EXT extensions */
|
||||
VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps),
|
||||
VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering),
|
||||
@ -791,6 +792,7 @@ struct vkd3d_physical_device_info
|
||||
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
|
||||
VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties;
|
||||
VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties;
|
||||
VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties;
|
||||
|
||||
VkPhysicalDeviceProperties2KHR properties2;
|
||||
|
||||
@ -803,6 +805,7 @@ struct vkd3d_physical_device_info
|
||||
VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features;
|
||||
VkPhysicalDeviceTransformFeedbackFeaturesEXT xfb_features;
|
||||
VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT vertex_divisor_features;
|
||||
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR timeline_semaphore_features;
|
||||
|
||||
VkPhysicalDeviceFeatures2 features2;
|
||||
};
|
||||
@ -814,11 +817,13 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
|
||||
VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties;
|
||||
VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties;
|
||||
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties;
|
||||
VkPhysicalDeviceTimelineSemaphorePropertiesKHR *timeline_semaphore_properties;
|
||||
VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features;
|
||||
VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features;
|
||||
VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features;
|
||||
VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features;
|
||||
VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features;
|
||||
VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *timeline_semaphore_features;
|
||||
VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features;
|
||||
VkPhysicalDeviceMaintenance3Properties *maintenance3_properties;
|
||||
VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties;
|
||||
@ -838,6 +843,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
|
||||
buffer_alignment_properties = &info->texel_buffer_alignment_properties;
|
||||
vertex_divisor_features = &info->vertex_divisor_features;
|
||||
vertex_divisor_properties = &info->vertex_divisor_properties;
|
||||
timeline_semaphore_features = &info->timeline_semaphore_features;
|
||||
timeline_semaphore_properties = &info->timeline_semaphore_properties;
|
||||
xfb_features = &info->xfb_features;
|
||||
xfb_properties = &info->xfb_properties;
|
||||
|
||||
@ -859,6 +866,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
|
||||
vk_prepend_struct(&info->features2, xfb_features);
|
||||
vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT;
|
||||
vk_prepend_struct(&info->features2, vertex_divisor_features);
|
||||
timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR;
|
||||
vk_prepend_struct(&info->features2, timeline_semaphore_features);
|
||||
|
||||
if (vulkan_info->KHR_get_physical_device_properties2)
|
||||
VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2));
|
||||
@ -877,6 +886,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
|
||||
vk_prepend_struct(&info->properties2, xfb_properties);
|
||||
vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT;
|
||||
vk_prepend_struct(&info->properties2, vertex_divisor_properties);
|
||||
timeline_semaphore_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR;
|
||||
vk_prepend_struct(&info->properties2, timeline_semaphore_properties);
|
||||
|
||||
if (vulkan_info->KHR_get_physical_device_properties2)
|
||||
VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2));
|
||||
@ -1465,6 +1476,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
|
||||
vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect;
|
||||
vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries;
|
||||
vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1);
|
||||
vulkan_info->timeline_semaphore_properties = physical_device_info->timeline_semaphore_properties;
|
||||
|
||||
device->feature_options.DoublePrecisionFloatShaderOps = features->shaderFloat64;
|
||||
device->feature_options.OutputMergerLogicOp = features->logicOp;
|
||||
@ -1589,6 +1601,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
|
||||
vulkan_info->EXT_shader_demote_to_helper_invocation = false;
|
||||
if (!physical_device_info->texel_buffer_alignment_features.texelBufferAlignment)
|
||||
vulkan_info->EXT_texel_buffer_alignment = false;
|
||||
if (!physical_device_info->timeline_semaphore_features.timelineSemaphore)
|
||||
vulkan_info->KHR_timeline_semaphore = false;
|
||||
|
||||
vulkan_info->texel_buffer_alignment_properties = physical_device_info->texel_buffer_alignment_properties;
|
||||
|
||||
@ -1939,6 +1953,75 @@ static bool d3d12_is_64k_msaa_supported(struct d3d12_device *device)
|
||||
&& info.Alignment <= 0x10000;
|
||||
}
|
||||
|
||||
/* A lower value can be signalled on a D3D12 fence. Vulkan timeline semaphores
|
||||
* do not support this, but test if it works anyway. */
|
||||
static bool d3d12_is_timeline_semaphore_supported(const struct d3d12_device *device)
|
||||
{
|
||||
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
|
||||
VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info;
|
||||
VkSemaphore timeline_semaphore;
|
||||
VkSubmitInfo submit_info;
|
||||
bool result = false;
|
||||
uint64_t value = 0;
|
||||
VkQueue vk_queue;
|
||||
VkResult vr;
|
||||
|
||||
if (!device->vk_info.KHR_timeline_semaphore)
|
||||
return false;
|
||||
|
||||
if ((vr = vkd3d_create_timeline_semaphore(device, 1, &timeline_semaphore)) < 0)
|
||||
{
|
||||
WARN("Failed to create timeline semaphore, vr %d.\n", vr);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!(vk_queue = vkd3d_queue_acquire(device->direct_queue)))
|
||||
{
|
||||
ERR("Failed to acquire queue %p.\n", device->direct_queue);
|
||||
VK_CALL(vkDestroySemaphore(device->vk_device, timeline_semaphore, NULL));
|
||||
return false;
|
||||
}
|
||||
|
||||
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
|
||||
submit_info.pNext = &timeline_submit_info;
|
||||
submit_info.waitSemaphoreCount = 0;
|
||||
submit_info.pWaitSemaphores = NULL;
|
||||
submit_info.pWaitDstStageMask = NULL;
|
||||
submit_info.commandBufferCount = 0;
|
||||
submit_info.pCommandBuffers = NULL;
|
||||
submit_info.signalSemaphoreCount = 1;
|
||||
submit_info.pSignalSemaphores = &timeline_semaphore;
|
||||
|
||||
timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR;
|
||||
timeline_submit_info.pNext = NULL;
|
||||
timeline_submit_info.pSignalSemaphoreValues = &value;
|
||||
timeline_submit_info.signalSemaphoreValueCount = 1;
|
||||
timeline_submit_info.waitSemaphoreValueCount = 0;
|
||||
timeline_submit_info.pWaitSemaphoreValues = NULL;
|
||||
|
||||
vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE));
|
||||
|
||||
if (vr >= 0)
|
||||
{
|
||||
if ((vr = VK_CALL(vkQueueWaitIdle(vk_queue))) < 0)
|
||||
WARN("Failed to wait for queue, vr %d.\n", vr);
|
||||
|
||||
if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, timeline_semaphore, &value))) < 0)
|
||||
ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr);
|
||||
else if (!(result = !value))
|
||||
WARN("Disabling timeline semaphore use due to incompatible behaviour.\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
WARN("Failed to submit signal operation, vr %d.\n", vr);
|
||||
}
|
||||
|
||||
vkd3d_queue_release(device->direct_queue);
|
||||
VK_CALL(vkDestroySemaphore(device->vk_device, timeline_semaphore, NULL));
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static HRESULT vkd3d_create_vk_device(struct d3d12_device *device,
|
||||
const struct vkd3d_device_create_info *create_info)
|
||||
{
|
||||
@ -2037,6 +2120,10 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device,
|
||||
}
|
||||
|
||||
device->feature_options4.MSAA64KBAlignedTextureSupported = d3d12_is_64k_msaa_supported(device);
|
||||
device->use_timeline_semaphores = d3d12_is_timeline_semaphore_supported(device)
|
||||
&& vkd3d_queue_init_timeline_semaphore(device->direct_queue, device)
|
||||
&& vkd3d_queue_init_timeline_semaphore(device->compute_queue, device)
|
||||
&& vkd3d_queue_init_timeline_semaphore(device->copy_queue, device);
|
||||
|
||||
TRACE("Created Vulkan device %p.\n", vk_device);
|
||||
|
||||
|
@ -59,6 +59,7 @@
|
||||
#define VKD3D_MAX_SHADER_EXTENSIONS 3u
|
||||
#define VKD3D_MAX_SHADER_STAGES 5u
|
||||
#define VKD3D_MAX_VK_SYNC_OBJECTS 4u
|
||||
#define VKD3D_MAX_FENCE_WAITING_QUEUES 4u
|
||||
#define VKD3D_MAX_DESCRIPTOR_SETS 64u
|
||||
/* D3D12 binding tier 3 has a limit of 2048 samplers. */
|
||||
#define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u
|
||||
@ -125,6 +126,7 @@ struct vkd3d_vulkan_info
|
||||
bool KHR_maintenance3;
|
||||
bool KHR_push_descriptor;
|
||||
bool KHR_sampler_mirror_clamp_to_edge;
|
||||
bool KHR_timeline_semaphore;
|
||||
/* EXT device extensions */
|
||||
bool EXT_calibrated_timestamps;
|
||||
bool EXT_conditional_rendering;
|
||||
@ -150,6 +152,8 @@ struct vkd3d_vulkan_info
|
||||
|
||||
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
|
||||
|
||||
VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties;
|
||||
|
||||
unsigned int shader_extension_count;
|
||||
enum vkd3d_shader_spirv_extension shader_extensions[VKD3D_MAX_SHADER_EXTENSIONS];
|
||||
|
||||
@ -348,6 +352,7 @@ struct vkd3d_fence_worker
|
||||
struct vkd3d_enqueued_fence
|
||||
{
|
||||
VkFence vk_fence;
|
||||
VkSemaphore vk_semaphore;
|
||||
struct vkd3d_waiting_fence waiting_fence;
|
||||
} *enqueued_fences;
|
||||
size_t enqueued_fences_size;
|
||||
@ -357,6 +362,12 @@ struct vkd3d_fence_worker
|
||||
size_t vk_fences_size;
|
||||
struct vkd3d_waiting_fence *fences;
|
||||
size_t fences_size;
|
||||
VkSemaphore *vk_semaphores;
|
||||
size_t vk_semaphores_size;
|
||||
uint64_t *semaphore_wait_values;
|
||||
size_t semaphore_wait_values_size;
|
||||
|
||||
void (*wait_for_gpu_fences)(struct vkd3d_fence_worker *worker);
|
||||
|
||||
struct d3d12_device *device;
|
||||
};
|
||||
@ -511,6 +522,12 @@ struct vkd3d_signaled_semaphore
|
||||
bool is_acquired;
|
||||
};
|
||||
|
||||
struct vkd3d_pending_fence_wait
|
||||
{
|
||||
const struct vkd3d_queue *queue;
|
||||
uint64_t pending_value;
|
||||
};
|
||||
|
||||
/* ID3D12Fence */
|
||||
struct d3d12_fence
|
||||
{
|
||||
@ -530,6 +547,11 @@ struct d3d12_fence
|
||||
size_t events_size;
|
||||
size_t event_count;
|
||||
|
||||
VkSemaphore timeline_semaphore;
|
||||
uint64_t pending_timeline_value;
|
||||
struct vkd3d_pending_fence_wait gpu_waits[VKD3D_MAX_FENCE_WAITING_QUEUES];
|
||||
unsigned int gpu_wait_count;
|
||||
|
||||
struct list semaphores;
|
||||
unsigned int semaphore_count;
|
||||
|
||||
@ -545,6 +567,9 @@ struct d3d12_fence
|
||||
HRESULT d3d12_fence_create(struct d3d12_device *device, uint64_t initial_value,
|
||||
D3D12_FENCE_FLAGS flags, struct d3d12_fence **fence);
|
||||
|
||||
VkResult vkd3d_create_timeline_semaphore(const struct d3d12_device *device, uint64_t initial_value,
|
||||
VkSemaphore *timeline_semaphore);
|
||||
|
||||
/* ID3D12Heap */
|
||||
struct d3d12_heap
|
||||
{
|
||||
@ -1284,6 +1309,9 @@ struct vkd3d_queue
|
||||
VkQueueFlags vk_queue_flags;
|
||||
uint32_t timestamp_bits;
|
||||
|
||||
VkSemaphore wait_completion_semaphore;
|
||||
uint64_t pending_wait_completion_value;
|
||||
|
||||
struct
|
||||
{
|
||||
VkSemaphore vk_semaphore;
|
||||
@ -1298,6 +1326,7 @@ struct vkd3d_queue
|
||||
VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue);
|
||||
HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index,
|
||||
const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue);
|
||||
bool vkd3d_queue_init_timeline_semaphore(struct vkd3d_queue *queue, struct d3d12_device *device);
|
||||
void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device);
|
||||
void vkd3d_queue_release(struct vkd3d_queue *queue);
|
||||
|
||||
@ -1456,6 +1485,7 @@ struct d3d12_device
|
||||
VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT];
|
||||
struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT];
|
||||
bool use_vk_heaps;
|
||||
bool use_timeline_semaphores;
|
||||
};
|
||||
|
||||
HRESULT d3d12_device_create(struct vkd3d_instance *instance,
|
||||
|
@ -195,6 +195,11 @@ VK_DEVICE_EXT_PFN(vkGetDescriptorSetLayoutSupportKHR)
|
||||
/* VK_KHR_push_descriptor */
|
||||
VK_DEVICE_EXT_PFN(vkCmdPushDescriptorSetKHR)
|
||||
|
||||
/* VK_KHR_timeline_semaphore */
|
||||
VK_DEVICE_EXT_PFN(vkGetSemaphoreCounterValueKHR)
|
||||
VK_DEVICE_EXT_PFN(vkWaitSemaphoresKHR)
|
||||
VK_DEVICE_EXT_PFN(vkSignalSemaphoreKHR)
|
||||
|
||||
/* VK_EXT_calibrated_timestamps */
|
||||
VK_DEVICE_EXT_PFN(vkGetCalibratedTimestampsEXT)
|
||||
|
||||
|
@ -33240,7 +33240,9 @@ static void test_queue_wait(void)
|
||||
command_list = context.list;
|
||||
queue = context.queue;
|
||||
|
||||
queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
|
||||
/* 'queue2' must not map to the same command queue as 'queue', or Wait() before GPU signal will fail.
|
||||
* Using a compute queue fixes this on most hardware, but it may still fail on low spec hardware. */
|
||||
queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
|
||||
|
||||
event = create_event();
|
||||
ok(event, "Failed to create event.\n");
|
||||
@ -33305,12 +33307,6 @@ static void test_queue_wait(void)
|
||||
check_readback_data_uint(&rb, NULL, 0xff00ff00, 0);
|
||||
release_resource_readback(&rb);
|
||||
|
||||
if (!vkd3d_test_platform_is_windows())
|
||||
{
|
||||
skip("Wait() is not implemented yet.\n"); /* FIXME */
|
||||
goto skip_tests;
|
||||
}
|
||||
|
||||
/* Wait() before CPU signal */
|
||||
update_buffer_data(cb, 0, sizeof(blue), &blue);
|
||||
queue_wait(queue, fence, 2);
|
||||
@ -33386,7 +33382,6 @@ static void test_queue_wait(void)
|
||||
check_readback_data_uint(&rb, NULL, 0xff00ff00, 0);
|
||||
release_resource_readback(&rb);
|
||||
|
||||
skip_tests:
|
||||
/* Signal() and Wait() in the same command queue */
|
||||
update_buffer_data(cb, 0, sizeof(blue), &blue);
|
||||
queue_signal(queue, fence, 7);
|
||||
|
Loading…
x
Reference in New Issue
Block a user