vkd3d: Map timeline semaphore values to fence virtual values and buffer out-of-order waits.

Strictly increasing timeline values must be mapped to fence virtual values
to avoid invalid use of Vulkan timeline semaphores. In particular, non-
increasing values and value jumps of >= 4G are permitted in d3d12.

Different virtual D3D12 command queues may map to the same Vulkan queue.
If a wait of value N is submitted on one command queue, and then a signal
for >= N is submitted on another, but they are sent to the same Vk queue,
the wait will never complete. The solution is to buffer out-of-order waits
and any subsequent queue commands until an unblocking signal value is
submitted to a different D3D12 queue, or signaled on the CPU.

Buffering out-of-order waits also fixes the old fence implementation so it
is fully functional, though a bit less efficient than timeline semaphores.

Based in part on vkd3d-proton patches by Hans-Kristian Arntzen. Unlike the
vkd3d-proton implementation, this patch does not use worker threads for
submissions to the Vulkan queue.

Signed-off-by: Conor McCarthy <cmccarthy@codeweavers.com>
Signed-off-by: Henri Verbeet <hverbeet@codeweavers.com>
Signed-off-by: Alexandre Julliard <julliard@winehq.org>
This commit is contained in:
Conor McCarthy 2022-05-13 01:11:06 +10:00 committed by Alexandre Julliard
parent a5c63dc4b9
commit 8cae046803
4 changed files with 607 additions and 460 deletions

File diff suppressed because it is too large Load Diff

View File

@ -747,7 +747,6 @@ struct vkd3d_physical_device_info
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties; VkPhysicalDeviceTransformFeedbackPropertiesEXT xfb_properties;
VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT vertex_divisor_properties;
VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties;
VkPhysicalDeviceProperties2KHR properties2; VkPhysicalDeviceProperties2KHR properties2;
@ -772,7 +771,6 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties; VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties;
VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties;
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties;
VkPhysicalDeviceTimelineSemaphorePropertiesKHR *timeline_semaphore_properties;
VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features;
VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features;
VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features;
@ -799,7 +797,6 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
vertex_divisor_features = &info->vertex_divisor_features; vertex_divisor_features = &info->vertex_divisor_features;
vertex_divisor_properties = &info->vertex_divisor_properties; vertex_divisor_properties = &info->vertex_divisor_properties;
timeline_semaphore_features = &info->timeline_semaphore_features; timeline_semaphore_features = &info->timeline_semaphore_features;
timeline_semaphore_properties = &info->timeline_semaphore_properties;
xfb_features = &info->xfb_features; xfb_features = &info->xfb_features;
xfb_properties = &info->xfb_properties; xfb_properties = &info->xfb_properties;
@ -841,8 +838,6 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i
vk_prepend_struct(&info->properties2, xfb_properties); vk_prepend_struct(&info->properties2, xfb_properties);
vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT;
vk_prepend_struct(&info->properties2, vertex_divisor_properties); vk_prepend_struct(&info->properties2, vertex_divisor_properties);
timeline_semaphore_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES_KHR;
vk_prepend_struct(&info->properties2, timeline_semaphore_properties);
if (vulkan_info->KHR_get_physical_device_properties2) if (vulkan_info->KHR_get_physical_device_properties2)
VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2));
@ -1431,7 +1426,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device,
vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect;
vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries;
vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1); vulkan_info->max_vertex_attrib_divisor = max(physical_device_info->vertex_divisor_properties.maxVertexAttribDivisor, 1);
vulkan_info->timeline_semaphore_properties = physical_device_info->timeline_semaphore_properties;
device->feature_options.DoublePrecisionFloatShaderOps = features->shaderFloat64; device->feature_options.DoublePrecisionFloatShaderOps = features->shaderFloat64;
device->feature_options.OutputMergerLogicOp = features->logicOp; device->feature_options.OutputMergerLogicOp = features->logicOp;
@ -1908,75 +1902,6 @@ static bool d3d12_is_64k_msaa_supported(struct d3d12_device *device)
&& info.Alignment <= 0x10000; && info.Alignment <= 0x10000;
} }
/* A lower value can be signalled on a D3D12 fence. Vulkan timeline semaphores
* do not support this, but test if it works anyway. */
static bool d3d12_is_timeline_semaphore_supported(const struct d3d12_device *device)
{
const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs;
VkTimelineSemaphoreSubmitInfoKHR timeline_submit_info;
VkSemaphore timeline_semaphore;
VkSubmitInfo submit_info;
bool result = false;
uint64_t value = 0;
VkQueue vk_queue;
VkResult vr;
if (!device->vk_info.KHR_timeline_semaphore)
return false;
if ((vr = vkd3d_create_timeline_semaphore(device, 1, &timeline_semaphore)) < 0)
{
WARN("Failed to create timeline semaphore, vr %d.\n", vr);
return false;
}
if (!(vk_queue = vkd3d_queue_acquire(device->direct_queue)))
{
ERR("Failed to acquire queue %p.\n", device->direct_queue);
VK_CALL(vkDestroySemaphore(device->vk_device, timeline_semaphore, NULL));
return false;
}
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit_info.pNext = &timeline_submit_info;
submit_info.waitSemaphoreCount = 0;
submit_info.pWaitSemaphores = NULL;
submit_info.pWaitDstStageMask = NULL;
submit_info.commandBufferCount = 0;
submit_info.pCommandBuffers = NULL;
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &timeline_semaphore;
timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR;
timeline_submit_info.pNext = NULL;
timeline_submit_info.pSignalSemaphoreValues = &value;
timeline_submit_info.signalSemaphoreValueCount = 1;
timeline_submit_info.waitSemaphoreValueCount = 0;
timeline_submit_info.pWaitSemaphoreValues = NULL;
vr = VK_CALL(vkQueueSubmit(vk_queue, 1, &submit_info, VK_NULL_HANDLE));
if (vr >= 0)
{
if ((vr = VK_CALL(vkQueueWaitIdle(vk_queue))) < 0)
WARN("Failed to wait for queue, vr %d.\n", vr);
if ((vr = VK_CALL(vkGetSemaphoreCounterValueKHR(device->vk_device, timeline_semaphore, &value))) < 0)
ERR("Failed to get Vulkan semaphore status, vr %d.\n", vr);
else if (!(result = !value))
WARN("Disabling timeline semaphore use due to incompatible behaviour.\n");
}
else
{
WARN("Failed to submit signal operation, vr %d.\n", vr);
}
vkd3d_queue_release(device->direct_queue);
VK_CALL(vkDestroySemaphore(device->vk_device, timeline_semaphore, NULL));
return result;
}
static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, static HRESULT vkd3d_create_vk_device(struct d3d12_device *device,
const struct vkd3d_device_create_info *create_info) const struct vkd3d_device_create_info *create_info)
{ {
@ -2075,10 +2000,6 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device,
} }
device->feature_options4.MSAA64KBAlignedTextureSupported = d3d12_is_64k_msaa_supported(device); device->feature_options4.MSAA64KBAlignedTextureSupported = d3d12_is_64k_msaa_supported(device);
device->use_timeline_semaphores = d3d12_is_timeline_semaphore_supported(device)
&& vkd3d_queue_init_timeline_semaphore(device->direct_queue, device)
&& vkd3d_queue_init_timeline_semaphore(device->compute_queue, device)
&& vkd3d_queue_init_timeline_semaphore(device->copy_queue, device);
TRACE("Created Vulkan device %p.\n", vk_device); TRACE("Created Vulkan device %p.\n", vk_device);
@ -4362,6 +4283,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device,
vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator);
vkd3d_time_domains_init(device); vkd3d_time_domains_init(device);
device->blocked_queue_count = 0;
for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i) for (i = 0; i < ARRAY_SIZE(device->desc_mutex); ++i)
vkd3d_mutex_init(&device->desc_mutex[i]); vkd3d_mutex_init(&device->desc_mutex[i]);

View File

@ -59,7 +59,7 @@
#define VKD3D_MAX_SHADER_EXTENSIONS 3u #define VKD3D_MAX_SHADER_EXTENSIONS 3u
#define VKD3D_MAX_SHADER_STAGES 5u #define VKD3D_MAX_SHADER_STAGES 5u
#define VKD3D_MAX_VK_SYNC_OBJECTS 4u #define VKD3D_MAX_VK_SYNC_OBJECTS 4u
#define VKD3D_MAX_FENCE_WAITING_QUEUES 4u #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u
#define VKD3D_MAX_DESCRIPTOR_SETS 64u #define VKD3D_MAX_DESCRIPTOR_SETS 64u
/* D3D12 binding tier 3 has a limit of 2048 samplers. */ /* D3D12 binding tier 3 has a limit of 2048 samplers. */
#define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u #define VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS 2048u
@ -152,8 +152,6 @@ struct vkd3d_vulkan_info
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties;
VkPhysicalDeviceTimelineSemaphorePropertiesKHR timeline_semaphore_properties;
unsigned int shader_extension_count; unsigned int shader_extension_count;
enum vkd3d_shader_spirv_extension shader_extensions[VKD3D_MAX_SHADER_EXTENSIONS]; enum vkd3d_shader_spirv_extension shader_extensions[VKD3D_MAX_SHADER_EXTENSIONS];
@ -502,15 +500,17 @@ HRESULT vkd3d_set_private_data_interface(struct vkd3d_private_store *store, cons
struct vkd3d_signaled_semaphore struct vkd3d_signaled_semaphore
{ {
uint64_t value; uint64_t value;
VkSemaphore vk_semaphore; union
VkFence vk_fence; {
bool is_acquired; struct
}; {
VkSemaphore vk_semaphore;
struct vkd3d_pending_fence_wait VkFence vk_fence;
{ bool is_acquired;
const struct vkd3d_queue *queue; } binary;
uint64_t pending_value; uint64_t timeline_value;
} u;
const struct vkd3d_queue *signalling_queue;
}; };
/* ID3D12Fence */ /* ID3D12Fence */
@ -521,6 +521,7 @@ struct d3d12_fence
LONG refcount; LONG refcount;
uint64_t value; uint64_t value;
uint64_t max_pending_value;
struct vkd3d_mutex mutex; struct vkd3d_mutex mutex;
struct vkd3d_cond null_event_cond; struct vkd3d_cond null_event_cond;
@ -534,9 +535,8 @@ struct d3d12_fence
size_t event_count; size_t event_count;
VkSemaphore timeline_semaphore; VkSemaphore timeline_semaphore;
uint64_t timeline_value;
uint64_t pending_timeline_value; uint64_t pending_timeline_value;
struct vkd3d_pending_fence_wait gpu_waits[VKD3D_MAX_FENCE_WAITING_QUEUES];
unsigned int gpu_wait_count;
struct vkd3d_signaled_semaphore *semaphores; struct vkd3d_signaled_semaphore *semaphores;
size_t semaphores_size; size_t semaphores_size;
@ -1294,9 +1294,6 @@ struct vkd3d_queue
VkQueueFlags vk_queue_flags; VkQueueFlags vk_queue_flags;
uint32_t timestamp_bits; uint32_t timestamp_bits;
VkSemaphore wait_completion_semaphore;
uint64_t pending_wait_completion_value;
struct struct
{ {
VkSemaphore vk_semaphore; VkSemaphore vk_semaphore;
@ -1311,10 +1308,45 @@ struct vkd3d_queue
VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue); VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue);
HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index, HRESULT vkd3d_queue_create(struct d3d12_device *device, uint32_t family_index,
const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue); const VkQueueFamilyProperties *properties, struct vkd3d_queue **queue);
bool vkd3d_queue_init_timeline_semaphore(struct vkd3d_queue *queue, struct d3d12_device *device);
void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device); void vkd3d_queue_destroy(struct vkd3d_queue *queue, struct d3d12_device *device);
void vkd3d_queue_release(struct vkd3d_queue *queue); void vkd3d_queue_release(struct vkd3d_queue *queue);
enum vkd3d_cs_op
{
VKD3D_CS_OP_WAIT,
VKD3D_CS_OP_SIGNAL,
VKD3D_CS_OP_EXECUTE,
};
struct vkd3d_cs_wait
{
struct d3d12_fence *fence;
uint64_t value;
};
struct vkd3d_cs_signal
{
struct d3d12_fence *fence;
uint64_t value;
};
struct vkd3d_cs_execute
{
VkCommandBuffer *buffers;
unsigned int buffer_count;
};
struct vkd3d_cs_op_data
{
enum vkd3d_cs_op opcode;
union
{
struct vkd3d_cs_wait wait;
struct vkd3d_cs_signal signal;
struct vkd3d_cs_execute execute;
} u;
};
/* ID3D12CommandQueue */ /* ID3D12CommandQueue */
struct d3d12_command_queue struct d3d12_command_queue
{ {
@ -1331,6 +1363,12 @@ struct d3d12_command_queue
struct d3d12_device *device; struct d3d12_device *device;
struct vkd3d_mutex op_mutex;
struct vkd3d_cs_op_data *ops;
size_t ops_count;
size_t ops_size;
bool is_flushing;
struct vkd3d_private_store private_store; struct vkd3d_private_store private_store;
}; };
@ -1452,6 +1490,9 @@ struct d3d12_device
unsigned int queue_family_count; unsigned int queue_family_count;
VkTimeDomainEXT vk_host_time_domain; VkTimeDomainEXT vk_host_time_domain;
struct d3d12_command_queue *blocked_queues[VKD3D_MAX_DEVICE_BLOCKED_QUEUES];
unsigned int blocked_queue_count;
struct vkd3d_instance *vkd3d_instance; struct vkd3d_instance *vkd3d_instance;
IUnknown *parent; IUnknown *parent;
@ -1470,7 +1511,6 @@ struct d3d12_device
VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT];
struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT];
bool use_vk_heaps; bool use_vk_heaps;
bool use_timeline_semaphores;
}; };
HRESULT d3d12_device_create(struct vkd3d_instance *instance, HRESULT d3d12_device_create(struct vkd3d_instance *instance,

View File

@ -33224,9 +33224,7 @@ static void test_queue_wait(void)
command_list = context.list; command_list = context.list;
queue = context.queue; queue = context.queue;
/* 'queue2' must not map to the same command queue as 'queue', or Wait() before GPU signal will fail. queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_DIRECT, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
* Using a compute queue fixes this on most hardware, but it may still fail on low spec hardware. */
queue2 = create_command_queue(device, D3D12_COMMAND_LIST_TYPE_COMPUTE, D3D12_COMMAND_QUEUE_PRIORITY_NORMAL);
event = create_event(); event = create_event();
ok(event, "Failed to create event.\n"); ok(event, "Failed to create event.\n");