diff --git a/README b/README index ed53f202..066a24bc 100644 --- a/README +++ b/README @@ -51,6 +51,9 @@ commas or semicolons. even when the output supports colour. * VKD3D_CONFIG - a list of options that change the behavior of libvkd3d. + * virtual_heaps - Create descriptors for each D3D12 root signature + descriptor range instead of entire descriptor heaps. Useful when push + constant or bound descriptor limits are exceeded. * vk_debug - enables Vulkan debug extensions. * VKD3D_DEBUG - controls the debug level for log messages produced by diff --git a/include/private/vkd3d_common.h b/include/private/vkd3d_common.h index ebf05216..11acc643 100644 --- a/include/private/vkd3d_common.h +++ b/include/private/vkd3d_common.h @@ -210,6 +210,10 @@ static inline LONG InterlockedIncrement(LONG volatile *x) { return __sync_add_and_fetch(x, 1); } +static inline LONG64 InterlockedIncrement64(LONG64 volatile *x) +{ + return __sync_add_and_fetch(x, 1); +} static inline LONG InterlockedAdd(LONG volatile *x, LONG val) { return __sync_add_and_fetch(x, val); diff --git a/include/vkd3d_windows.h b/include/vkd3d_windows.h index 8398d403..002ff667 100644 --- a/include/vkd3d_windows.h +++ b/include/vkd3d_windows.h @@ -91,6 +91,7 @@ typedef unsigned __int64 UINT64; typedef int64_t DECLSPEC_ALIGN(8) INT64; typedef uint64_t DECLSPEC_ALIGN(8) UINT64; # endif +typedef INT64 LONG64; typedef long LONG_PTR; typedef unsigned long ULONG_PTR; diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index f9b3389f..0424d204 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -1921,6 +1921,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l bindings->descriptor_set_count = 0; bindings->descriptor_table_dirty_mask = bindings->descriptor_table_active_mask & bindings->root_signature->descriptor_table_mask; bindings->push_descriptor_dirty_mask = bindings->push_descriptor_active_mask & bindings->root_signature->push_descriptor_mask; + bindings->cbv_srv_uav_heap_id = 0; + bindings->sampler_heap_id = 0; } static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, unsigned int stencil_state, @@ -3021,6 +3023,146 @@ static void d3d12_command_list_update_descriptors(struct d3d12_command_list *lis d3d12_command_list_update_uav_counter_descriptors(list, bind_point); } +static unsigned int d3d12_command_list_bind_descriptor_table(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings, unsigned int index, + struct d3d12_descriptor_heap **cbv_srv_uav_heap, struct d3d12_descriptor_heap **sampler_heap) +{ + struct d3d12_descriptor_heap *heap; + const struct d3d12_desc *desc; + unsigned int offset; + + if (!(desc = bindings->descriptor_tables[index])) + return 0; + + /* AMD, Nvidia and Intel drivers on Windows work if SetDescriptorHeaps() + * is not called, so we bind heaps from the tables instead. No NULL check is + * needed here because it's checked when descriptor tables are set. */ + heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&list->device->gpu_descriptor_allocator, desc); + offset = desc - (const struct d3d12_desc *)heap->descriptors; + + if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + if (*cbv_srv_uav_heap) + { + if (heap == *cbv_srv_uav_heap) + return offset; + /* This occurs occasionally in Rise of the Tomb Raider apparently due to a race + * condition (one of several), but adding a mutex for table updates has no effect. */ + WARN("List %p uses descriptors from more than one CBV/SRV/UAV heap.\n", list); + } + *cbv_srv_uav_heap = heap; + } + else + { + if (*sampler_heap) + { + if (heap == *sampler_heap) + return offset; + WARN("List %p uses descriptors from more than one sampler heap.\n", list); + } + *sampler_heap = heap; + } + + return offset; +} + +static void d3d12_command_list_update_descriptor_tables(struct d3d12_command_list *list, + struct vkd3d_pipeline_bindings *bindings, struct d3d12_descriptor_heap **cbv_srv_uav_heap, + struct d3d12_descriptor_heap **sampler_heap) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + unsigned int offsets[D3D12_MAX_ROOT_COST]; + unsigned int i, j; + + for (i = 0, j = 0; i < ARRAY_SIZE(bindings->descriptor_tables); ++i) + { + if (!(rs->descriptor_table_mask & ((uint64_t)1 << i))) + continue; + offsets[j++] = d3d12_command_list_bind_descriptor_table(list, bindings, i, + cbv_srv_uav_heap, sampler_heap); + } + if (j) + { + VK_CALL(vkCmdPushConstants(list->vk_command_buffer, rs->vk_pipeline_layout, VK_SHADER_STAGE_ALL, + rs->descriptor_table_offset, j * sizeof(uint32_t), offsets)); + } +} + +static void d3d12_command_list_bind_descriptor_heap(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point, struct d3d12_descriptor_heap *heap) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + enum vkd3d_vk_descriptor_set_index set; + + if (!heap) + return; + + if (heap->desc.Type == D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV) + { + if (heap->serial_id == bindings->cbv_srv_uav_heap_id) + return; + bindings->cbv_srv_uav_heap_id = heap->serial_id; + } + else + { + if (heap->serial_id == bindings->sampler_heap_id) + return; + bindings->sampler_heap_id = heap->serial_id; + } + + /* These sets can be shared across multiple command lists, and therefore binding must + * be synchronised. On an experimental branch in which caching of Vk descriptor writes + * greatly increased the chance of multiple threads arriving here at the same time, + * GRID 2019 crashed without the mutex lock. */ + vkd3d_mutex_lock(&heap->vk_sets_mutex); + + for (set = 0; set < ARRAY_SIZE(heap->vk_descriptor_sets); ++set) + { + VkDescriptorSet vk_descriptor_set = heap->vk_descriptor_sets[set].vk_set; + + if (!vk_descriptor_set) + continue; + + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bindings->vk_bind_point, rs->vk_pipeline_layout, + rs->vk_set_count + set, 1, &vk_descriptor_set, 0, NULL)); + } + + vkd3d_mutex_unlock(&heap->vk_sets_mutex); +} + +static void d3d12_command_list_update_heap_descriptors(struct d3d12_command_list *list, + enum vkd3d_pipeline_bind_point bind_point) +{ + struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + struct d3d12_descriptor_heap *cbv_srv_uav_heap = NULL, *sampler_heap = NULL; + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + const struct d3d12_root_signature *rs = bindings->root_signature; + + if (!rs) + return; + + if (bindings->descriptor_table_dirty_mask || bindings->push_descriptor_dirty_mask) + d3d12_command_list_prepare_descriptors(list, bind_point); + if (bindings->descriptor_table_dirty_mask) + d3d12_command_list_update_descriptor_tables(list, bindings, &cbv_srv_uav_heap, &sampler_heap); + bindings->descriptor_table_dirty_mask = 0; + + d3d12_command_list_update_push_descriptors(list, bind_point); + + if (bindings->descriptor_set_count) + { + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, bindings->vk_bind_point, rs->vk_pipeline_layout, + rs->main_set, bindings->descriptor_set_count, bindings->descriptor_sets, 0, NULL)); + bindings->in_use = true; + } + + d3d12_command_list_bind_descriptor_heap(list, bind_point, cbv_srv_uav_heap); + d3d12_command_list_bind_descriptor_heap(list, bind_point, sampler_heap); +} + static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *list) { d3d12_command_list_end_current_render_pass(list); @@ -3028,7 +3170,7 @@ static bool d3d12_command_list_update_compute_state(struct d3d12_command_list *l if (!d3d12_command_list_update_compute_pipeline(list)) return false; - d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); + list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_COMPUTE); return true; } @@ -3045,7 +3187,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list if (!d3d12_command_list_update_current_framebuffer(list)) return false; - d3d12_command_list_update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); + list->update_descriptors(list, VKD3D_PIPELINE_BIND_POINT_GRAPHICS); if (list->current_render_pass != VK_NULL_HANDLE) return true; @@ -4113,6 +4255,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12Graphi TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); /* Our current implementation does not need this method. + * In Windows it doesn't need to be called at all for correct operation, and + * at least on AMD the wrong heaps can be set here and tests still succeed. * * It could be used to validate descriptor tables but we do not have an * equivalent of the D3D12 Debug Layer. */ @@ -5706,6 +5850,9 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d list->allocator = allocator; + list->update_descriptors = device->use_vk_heaps ? d3d12_command_list_update_heap_descriptors + : d3d12_command_list_update_descriptors; + if (SUCCEEDED(hr = d3d12_command_allocator_allocate_command_buffer(allocator, list))) { list->pipeline_bindings[VKD3D_PIPELINE_BIND_POINT_GRAPHICS].vk_uav_counter_views = NULL; diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 1ccd242a..e28faaa9 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -142,6 +142,112 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_VERTEX_ATTRIBUTE_DIVISOR, EXT_vertex_attribute_divisor), }; +static HRESULT vkd3d_create_vk_descriptor_heap_layout(struct d3d12_device *device, unsigned int index) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorSetLayoutBindingFlagsCreateInfoEXT flags_info; + VkDescriptorSetLayoutCreateInfo set_desc; + VkDescriptorBindingFlagsEXT set_flags; + VkDescriptorSetLayoutBinding binding; + VkResult vr; + + binding.binding = 0; + binding.descriptorType = device->vk_descriptor_heap_layouts[index].type; + binding.descriptorCount = device->vk_descriptor_heap_layouts[index].count; + binding.stageFlags = VK_SHADER_STAGE_ALL; + binding.pImmutableSamplers = NULL; + + set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + set_desc.pNext = &flags_info; + set_desc.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_UPDATE_AFTER_BIND_POOL_BIT_EXT; + set_desc.bindingCount = 1; + set_desc.pBindings = &binding; + + set_flags = VK_DESCRIPTOR_BINDING_VARIABLE_DESCRIPTOR_COUNT_BIT_EXT + | VK_DESCRIPTOR_BINDING_PARTIALLY_BOUND_BIT_EXT | VK_DESCRIPTOR_BINDING_UPDATE_AFTER_BIND_BIT_EXT + | VK_DESCRIPTOR_BINDING_UPDATE_UNUSED_WHILE_PENDING_BIT_EXT; + + flags_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_BINDING_FLAGS_CREATE_INFO_EXT; + flags_info.pNext = NULL; + flags_info.bindingCount = 1; + flags_info.pBindingFlags = &set_flags; + + if ((vr = VK_CALL(vkCreateDescriptorSetLayout(device->vk_device, &set_desc, NULL, + &device->vk_descriptor_heap_layouts[index].vk_set_layout))) < 0) + { + WARN("Failed to create Vulkan descriptor set layout, vr %d.\n", vr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + +static void vkd3d_vk_descriptor_heap_layouts_cleanup(struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + enum vkd3d_vk_descriptor_set_index set; + + for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, device->vk_descriptor_heap_layouts[set].vk_set_layout, + NULL)); +} + +static HRESULT vkd3d_vk_descriptor_heap_layouts_init(struct d3d12_device *device) +{ + static const struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT] = + { + {VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, false, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + {VK_DESCRIPTOR_TYPE_SAMPLER, false, D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER}, + /* UAV counters */ + {VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER, true, D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV}, + }; + const struct vkd3d_device_descriptor_limits *limits = &device->vk_info.descriptor_limits; + enum vkd3d_vk_descriptor_set_index set; + HRESULT hr; + + for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + device->vk_descriptor_heap_layouts[set] = vk_descriptor_heap_layouts[set]; + + if (!device->use_vk_heaps) + return S_OK; + + for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + { + switch (device->vk_descriptor_heap_layouts[set].type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + device->vk_descriptor_heap_layouts[set].count = limits->uniform_buffer_max_descriptors; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + device->vk_descriptor_heap_layouts[set].count = limits->sampled_image_max_descriptors; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + device->vk_descriptor_heap_layouts[set].count = limits->storage_image_max_descriptors; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + device->vk_descriptor_heap_layouts[set].count = limits->sampler_max_descriptors; + break; + default: + ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); + break; + } + + if (FAILED(hr = vkd3d_create_vk_descriptor_heap_layout(device, set))) + { + vkd3d_vk_descriptor_heap_layouts_cleanup(device); + return hr; + } + } + + return S_OK; +} + static unsigned int get_spec_version(const VkExtensionProperties *extensions, unsigned int count, const char *extension_name) { @@ -431,6 +537,7 @@ static void vkd3d_init_debug_report(struct vkd3d_instance *instance) static const struct vkd3d_debug_option vkd3d_config_options[] = { + {"virtual_heaps", VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS}, /* always use virtual descriptor heaps */ {"vk_debug", VKD3D_CONFIG_FLAG_VULKAN_DEBUG}, /* enable Vulkan debug extensions */ }; @@ -1287,6 +1394,36 @@ static void vkd3d_device_descriptor_limits_init(struct vkd3d_device_descriptor_l limits->sampler_max_descriptors = min(device_limits->maxDescriptorSetSamplers, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); } +static void vkd3d_device_vk_heaps_descriptor_limits_init(struct vkd3d_device_descriptor_limits *limits, + const VkPhysicalDeviceDescriptorIndexingPropertiesEXT *properties) +{ + const unsigned int root_provision = D3D12_MAX_ROOT_COST / 2; + unsigned int srv_divisor = 1, uav_divisor = 1; + + /* The total number of populated sampled image or storage image descriptors never exceeds the size of + * one set (or two sets if every UAV has a counter), but the total size of bound layouts will exceed + * device limits if each set size is maxDescriptorSet*, because of the D3D12 buffer + image allowance + * (and UAV counters). Breaking limits for layouts seems to work with RADV and Nvidia drivers at + * least, but let's try to stay within them if limits are high enough. */ + if (properties->maxDescriptorSetUpdateAfterBindSampledImages >= (1u << 21)) + { + srv_divisor = 2; + uav_divisor = properties->maxDescriptorSetUpdateAfterBindSampledImages >= (3u << 20) ? 3 : 2; + } + + limits->uniform_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindUniformBuffers, + properties->maxPerStageDescriptorUpdateAfterBindUniformBuffers - root_provision); + limits->sampled_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSampledImages, + properties->maxPerStageDescriptorUpdateAfterBindSampledImages / srv_divisor - root_provision); + limits->storage_buffer_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageBuffers, + properties->maxPerStageDescriptorUpdateAfterBindStorageBuffers - root_provision); + limits->storage_image_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindStorageImages, + properties->maxPerStageDescriptorUpdateAfterBindStorageImages / uav_divisor - root_provision); + limits->sampler_max_descriptors = min(properties->maxDescriptorSetUpdateAfterBindSamplers, + properties->maxPerStageDescriptorUpdateAfterBindSamplers - root_provision); + limits->sampler_max_descriptors = min(limits->sampler_max_descriptors, VKD3D_MAX_DESCRIPTOR_SET_SAMPLERS); +} + static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, const struct vkd3d_device_create_info *create_info, struct vkd3d_physical_device_info *physical_device_info, @@ -1514,8 +1651,20 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, features->robustBufferAccess = VK_FALSE; } - vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, - &physical_device_info->properties2.properties.limits); + /* Select descriptor heap implementation. Forcing virtual heaps may be useful if + * a client allocates descriptor heaps too large for the Vulkan device, or the + * root signature cost exceeds the available push constant size. Virtual heaps + * use only enough descriptors for the descriptor tables of the currently bound + * root signature, and don't require a 32-bit push constant for each table. */ + device->use_vk_heaps = vulkan_info->EXT_descriptor_indexing + && !(device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS); + + if (device->use_vk_heaps) + vkd3d_device_vk_heaps_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->descriptor_indexing_properties); + else + vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, + &physical_device_info->properties2.properties.limits); return S_OK; } @@ -2504,6 +2653,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_private_store_destroy(&device->private_store); vkd3d_cleanup_format_info(device); + vkd3d_vk_descriptor_heap_layouts_cleanup(device); vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); vkd3d_destroy_null_resources(&device->null_resources, device); vkd3d_gpu_va_allocator_cleanup(&device->gpu_va_allocator); @@ -4005,6 +4155,9 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = vkd3d_uav_clear_state_init(&device->uav_clear_state, device))) goto out_destroy_null_resources; + if (FAILED(hr = vkd3d_vk_descriptor_heap_layouts_init(device))) + goto out_cleanup_uav_clear_state; + vkd3d_render_pass_cache_init(&device->render_pass_cache); vkd3d_gpu_descriptor_allocator_init(&device->gpu_descriptor_allocator); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator); @@ -4020,6 +4173,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, return S_OK; +out_cleanup_uav_clear_state: + vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); out_destroy_null_resources: vkd3d_destroy_null_resources(&device->null_resources, device); out_cleanup_format_info: diff --git a/libs/vkd3d/resource.c b/libs/vkd3d/resource.c index 4c48e22e..e5827955 100644 --- a/libs/vkd3d/resource.c +++ b/libs/vkd3d/resource.c @@ -2128,6 +2128,58 @@ void vkd3d_view_decref(struct vkd3d_view *view, struct d3d12_device *device) vkd3d_view_destroy(view, device); } +/* dst and src contain the same data unless another thread overwrites dst. The array index is + * calculated from dst, and src is thread safe. */ +static void d3d12_desc_write_vk_heap(const struct d3d12_desc *dst, const struct d3d12_desc *src, + struct d3d12_device *device) +{ + struct d3d12_descriptor_heap_vk_set *descriptor_set; + struct d3d12_descriptor_heap *descriptor_heap; + const struct vkd3d_vk_device_procs *vk_procs; + + descriptor_heap = vkd3d_gpu_descriptor_allocator_heap_from_descriptor(&device->gpu_descriptor_allocator, dst); + descriptor_set = &descriptor_heap->vk_descriptor_sets[vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( + src->vk_descriptor_type)]; + vk_procs = &device->vk_procs; + + vkd3d_mutex_lock(&descriptor_heap->vk_sets_mutex); + + descriptor_set->vk_descriptor_write.dstArrayElement = dst + - (const struct d3d12_desc *)descriptor_heap->descriptors; + switch (src->vk_descriptor_type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + descriptor_set->vk_descriptor_write.pBufferInfo = &src->u.vk_cbv_info; + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + descriptor_set->vk_image_info.imageView = src->u.view->u.vk_image_view; + break; + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + descriptor_set->vk_descriptor_write.pTexelBufferView = &src->u.view->u.vk_buffer_view; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + descriptor_set->vk_image_info.sampler = src->u.view->u.vk_sampler; + break; + default: + ERR("Unhandled descriptor type %#x.\n", src->vk_descriptor_type); + break; + } + VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, &descriptor_set->vk_descriptor_write, 0, NULL)); + + if (src->magic == VKD3D_DESCRIPTOR_MAGIC_UAV && src->u.view->vk_counter_view) + { + descriptor_set = &descriptor_heap->vk_descriptor_sets[VKD3D_SET_INDEX_UAV_COUNTER]; + descriptor_set->vk_descriptor_write.dstArrayElement = dst + - (const struct d3d12_desc *)descriptor_heap->descriptors; + descriptor_set->vk_descriptor_write.pTexelBufferView = &src->u.view->vk_counter_view; + VK_CALL(vkUpdateDescriptorSets(device->vk_device, 1, &descriptor_set->vk_descriptor_write, 0, NULL)); + } + + vkd3d_mutex_unlock(&descriptor_heap->vk_sets_mutex); +} + void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *src, struct d3d12_device *device) { @@ -2149,6 +2201,9 @@ void d3d12_desc_write_atomic(struct d3d12_desc *dst, const struct d3d12_desc *sr /* Destroy the view after unlocking to reduce wait time. */ if (defunct_view) vkd3d_view_destroy(defunct_view, device); + + if (device->use_vk_heaps && dst->magic) + d3d12_desc_write_vk_heap(dst, src, device); } static void d3d12_desc_destroy(struct d3d12_desc *descriptor, struct d3d12_device *device) @@ -3425,9 +3480,12 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea if (!refcount) { + const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_device *device = heap->device; unsigned int i; + vk_procs = &device->vk_procs; + vkd3d_private_store_destroy(&heap->private_store); switch (heap->desc.Type) @@ -3474,6 +3532,9 @@ static ULONG STDMETHODCALLTYPE d3d12_descriptor_heap_Release(ID3D12DescriptorHea break; } + VK_CALL(vkDestroyDescriptorPool(device->vk_device, heap->vk_descriptor_pool, NULL)); + vkd3d_mutex_destroy(&heap->vk_sets_mutex); + vkd3d_free(heap); d3d12_device_release(device); @@ -3584,19 +3645,155 @@ static const struct ID3D12DescriptorHeapVtbl d3d12_descriptor_heap_vtbl = d3d12_descriptor_heap_GetGPUDescriptorHandleForHeapStart, }; +const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[] = +{ + VKD3D_SET_INDEX_SAMPLER, + VKD3D_SET_INDEX_COUNT, + VKD3D_SET_INDEX_SAMPLED_IMAGE, + VKD3D_SET_INDEX_STORAGE_IMAGE, + VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER, + VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER, + VKD3D_SET_INDEX_UNIFORM_BUFFER, +}; + +static HRESULT d3d12_descriptor_heap_create_descriptor_pool(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorPoolSize pool_sizes[VKD3D_SET_INDEX_COUNT]; + struct VkDescriptorPoolCreateInfo pool_desc; + VkDevice vk_device = device->vk_device; + enum vkd3d_vk_descriptor_set_index set; + VkResult vr; + + for (set = 0, pool_desc.poolSizeCount = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + { + if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type) + { + pool_sizes[pool_desc.poolSizeCount].type = device->vk_descriptor_heap_layouts[set].type; + pool_sizes[pool_desc.poolSizeCount++].descriptorCount = desc->NumDescriptors; + } + } + + pool_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + pool_desc.pNext = NULL; + pool_desc.flags = VK_DESCRIPTOR_POOL_CREATE_UPDATE_AFTER_BIND_BIT_EXT; + pool_desc.maxSets = pool_desc.poolSizeCount; + pool_desc.pPoolSizes = pool_sizes; + if ((vr = VK_CALL(vkCreateDescriptorPool(vk_device, &pool_desc, NULL, &descriptor_heap->vk_descriptor_pool))) < 0) + ERR("Failed to create descriptor pool, vr %d.\n", vr); + + return hresult_from_vk_result(vr); +} + +static HRESULT d3d12_descriptor_heap_create_descriptor_set(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, unsigned int set) +{ + struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + uint32_t variable_binding_size = descriptor_heap->desc.NumDescriptors; + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkDescriptorSetVariableDescriptorCountAllocateInfoEXT set_size; + VkDescriptorSetAllocateInfo set_desc; + VkResult vr; + + set_desc.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + set_desc.pNext = &set_size; + set_desc.descriptorPool = descriptor_heap->vk_descriptor_pool; + set_desc.descriptorSetCount = 1; + set_desc.pSetLayouts = &device->vk_descriptor_heap_layouts[set].vk_set_layout; + set_size.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_VARIABLE_DESCRIPTOR_COUNT_ALLOCATE_INFO_EXT; + set_size.pNext = NULL; + set_size.descriptorSetCount = 1; + set_size.pDescriptorCounts = &variable_binding_size; + if ((vr = VK_CALL(vkAllocateDescriptorSets(device->vk_device, &set_desc, &descriptor_set->vk_set))) >= 0) + { + descriptor_set->vk_descriptor_write.dstSet = descriptor_set->vk_set; + return S_OK; + } + + ERR("Failed to allocate descriptor set, vr %d.\n", vr); + return hresult_from_vk_result(vr); +} + +static HRESULT d3d12_descriptor_heap_vk_descriptor_sets_init(struct d3d12_descriptor_heap *descriptor_heap, + struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) +{ + enum vkd3d_vk_descriptor_set_index set; + HRESULT hr; + + descriptor_heap->vk_descriptor_pool = VK_NULL_HANDLE; + memset(descriptor_heap->vk_descriptor_sets, 0, sizeof(descriptor_heap->vk_descriptor_sets)); + vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); + + if (!device->use_vk_heaps || (desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_CBV_SRV_UAV + && desc->Type != D3D12_DESCRIPTOR_HEAP_TYPE_SAMPLER)) + return S_OK; + + if (FAILED(hr = d3d12_descriptor_heap_create_descriptor_pool(descriptor_heap, device, desc))) + return hr; + + for (set = 0; set < ARRAY_SIZE(descriptor_heap->vk_descriptor_sets); ++set) + { + struct d3d12_descriptor_heap_vk_set *descriptor_set = &descriptor_heap->vk_descriptor_sets[set]; + + descriptor_set->vk_descriptor_write.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + descriptor_set->vk_descriptor_write.pNext = NULL; + descriptor_set->vk_descriptor_write.dstBinding = 0; + descriptor_set->vk_descriptor_write.descriptorCount = 1; + descriptor_set->vk_descriptor_write.descriptorType = device->vk_descriptor_heap_layouts[set].type; + descriptor_set->vk_descriptor_write.pImageInfo = NULL; + descriptor_set->vk_descriptor_write.pBufferInfo = NULL; + descriptor_set->vk_descriptor_write.pTexelBufferView = NULL; + switch (device->vk_descriptor_heap_layouts[set].type) + { + case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: + case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: + case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: + break; + case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: + descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; + descriptor_set->vk_image_info.sampler = VK_NULL_HANDLE; + descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL; + break; + case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: + descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; + descriptor_set->vk_image_info.sampler = VK_NULL_HANDLE; + descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + break; + case VK_DESCRIPTOR_TYPE_SAMPLER: + descriptor_set->vk_descriptor_write.pImageInfo = &descriptor_set->vk_image_info; + descriptor_set->vk_image_info.imageView = VK_NULL_HANDLE; + descriptor_set->vk_image_info.imageLayout = VK_IMAGE_LAYOUT_UNDEFINED; + break; + default: + ERR("Unhandled descriptor type %#x.\n", device->vk_descriptor_heap_layouts[set].type); + return E_FAIL; + } + if (device->vk_descriptor_heap_layouts[set].applicable_heap_type == desc->Type + && FAILED(hr = d3d12_descriptor_heap_create_descriptor_set(descriptor_heap, device, set))) + return hr; + } + + return S_OK; +} + static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descriptor_heap, struct d3d12_device *device, const D3D12_DESCRIPTOR_HEAP_DESC *desc) { + static LONG64 serial_id; HRESULT hr; descriptor_heap->ID3D12DescriptorHeap_iface.lpVtbl = &d3d12_descriptor_heap_vtbl; descriptor_heap->refcount = 1; + descriptor_heap->serial_id = InterlockedIncrement64(&serial_id); descriptor_heap->desc = *desc; if (FAILED(hr = vkd3d_private_store_init(&descriptor_heap->private_store))) return hr; + d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); + d3d12_device_add_ref(descriptor_heap->device = device); return S_OK; diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index 72422897..97e2856d 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -92,6 +92,8 @@ static void d3d12_root_signature_cleanup(struct d3d12_root_signature *root_signa if (root_signature->descriptor_mapping) vkd3d_free(root_signature->descriptor_mapping); vkd3d_free(root_signature->descriptor_offsets); + vkd3d_free(root_signature->uav_counter_mapping); + vkd3d_free(root_signature->uav_counter_offsets); if (root_signature->root_constants) vkd3d_free(root_signature->root_constants); @@ -327,6 +329,7 @@ static bool vk_binding_from_d3d12_descriptor_range(struct VkDescriptorSetLayoutB struct d3d12_root_signature_info { size_t binding_count; + size_t uav_range_count; size_t root_constant_count; size_t root_descriptor_count; @@ -401,6 +404,7 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig info->binding_count += binding_count; info->uav_count += count * 2u; uav_unbounded_range |= unbounded; + ++info->uav_range_count; break; case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: info->cbv_count += count; @@ -495,6 +499,7 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat uint32_t *push_constant_range_count) { uint32_t push_constants_offset[D3D12_SHADER_VISIBILITY_PIXEL + 1]; + bool use_vk_heaps = root_signature->device->use_vk_heaps; unsigned int i, j, push_constant_count; uint32_t offset; @@ -507,7 +512,8 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat continue; assert(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL); - push_constants[p->ShaderVisibility].stageFlags = stage_flags_from_visibility(p->ShaderVisibility); + push_constants[p->ShaderVisibility].stageFlags = use_vk_heaps ? VK_SHADER_STAGE_ALL + : stage_flags_from_visibility(p->ShaderVisibility); push_constants[p->ShaderVisibility].size += p->u.Constants.Num32BitValues * sizeof(uint32_t); } if (push_constants[D3D12_SHADER_VISIBILITY_ALL].size) @@ -586,6 +592,8 @@ struct vkd3d_descriptor_set_context unsigned int table_index; unsigned int unbounded_offset; unsigned int descriptor_index; + unsigned int uav_counter_index; + unsigned int push_constant_index; uint32_t descriptor_binding; }; @@ -595,6 +603,7 @@ static bool vkd3d_validate_descriptor_set_count(struct d3d12_device *device, uns if (set_count > max_count) { + /* NOTE: If maxBoundDescriptorSets is < 9, try VKD3D_CONFIG=virtual_heaps */ ERR("Required descriptor set count exceeds maximum allowed count of %u.\n", max_count); return false; } @@ -802,6 +811,122 @@ static void d3d12_root_signature_map_vk_unbounded_binding(struct d3d12_root_sign offset->dynamic_offset_index = ~0u; } +static unsigned int vk_heap_binding_count_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range, + unsigned int descriptor_set_size) +{ + unsigned int max_count; + + if (descriptor_set_size <= range->offset) + { + ERR("Descriptor range offset %u exceeds maximum available offset %u.\n", range->offset, descriptor_set_size - 1); + max_count = 0; + } + else + { + max_count = descriptor_set_size - range->offset; + } + + if (range->descriptor_count != UINT_MAX) + { + if (range->descriptor_count > max_count) + ERR("Range size %u exceeds available descriptor count %u.\n", range->descriptor_count, max_count); + return range->descriptor_count; + } + else + { + /* Prefer an unsupported binding count vs a zero count, because shader compilation will fail + * to match a declaration to a zero binding, resulting in failure of pipline state creation. */ + return max_count + !max_count; + } +} + +static void vkd3d_descriptor_heap_binding_from_descriptor_range(const struct d3d12_root_descriptor_table_range *range, + bool is_buffer, const struct d3d12_root_signature *root_signature, + struct vkd3d_shader_descriptor_binding *binding) +{ + const struct vkd3d_device_descriptor_limits *descriptor_limits = &root_signature->device->vk_info.descriptor_limits; + unsigned int descriptor_set_size; + + switch (range->type) + { + case VKD3D_SHADER_DESCRIPTOR_TYPE_SRV: + binding->set = is_buffer ? VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER : VKD3D_SET_INDEX_SAMPLED_IMAGE; + descriptor_set_size = descriptor_limits->sampled_image_max_descriptors; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_UAV: + binding->set = is_buffer ? VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER : VKD3D_SET_INDEX_STORAGE_IMAGE; + descriptor_set_size = descriptor_limits->storage_image_max_descriptors; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_CBV: + binding->set = VKD3D_SET_INDEX_UNIFORM_BUFFER; + descriptor_set_size = descriptor_limits->uniform_buffer_max_descriptors; + break; + case VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER: + binding->set = VKD3D_SET_INDEX_SAMPLER; + descriptor_set_size = descriptor_limits->sampler_max_descriptors; + break; + default: + FIXME("Unhandled descriptor range type type %#x.\n", range->type); + binding->set = VKD3D_SET_INDEX_SAMPLED_IMAGE; + descriptor_set_size = descriptor_limits->sampled_image_max_descriptors; + break; + } + binding->set += root_signature->vk_set_count; + binding->binding = 0; + binding->count = vk_heap_binding_count_from_descriptor_range(range, descriptor_set_size); +} + +static void d3d12_root_signature_map_vk_heap_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, bool buffer_descriptor, + enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) +{ + struct vkd3d_shader_resource_binding *mapping = &root_signature->descriptor_mapping[context->descriptor_index]; + struct vkd3d_shader_descriptor_offset *offset = &root_signature->descriptor_offsets[context->descriptor_index++]; + + mapping->type = range->type; + mapping->register_space = range->register_space; + mapping->register_index = range->base_register_idx; + mapping->shader_visibility = shader_visibility; + mapping->flags = buffer_descriptor ? VKD3D_SHADER_BINDING_FLAG_BUFFER : VKD3D_SHADER_BINDING_FLAG_IMAGE; + vkd3d_descriptor_heap_binding_from_descriptor_range(range, buffer_descriptor, root_signature, &mapping->binding); + offset->static_offset = range->offset; + offset->dynamic_offset_index = context->push_constant_index; +} + +static void d3d12_root_signature_map_vk_heap_uav_counter(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, enum vkd3d_shader_visibility shader_visibility, + struct vkd3d_descriptor_set_context *context) +{ + struct vkd3d_shader_uav_counter_binding *mapping = &root_signature->uav_counter_mapping[context->uav_counter_index]; + struct vkd3d_shader_descriptor_offset *offset = &root_signature->uav_counter_offsets[context->uav_counter_index++]; + + mapping->register_space = range->register_space; + mapping->register_index = range->base_register_idx; + mapping->shader_visibility = shader_visibility; + mapping->binding.set = root_signature->vk_set_count + VKD3D_SET_INDEX_UAV_COUNTER; + mapping->binding.binding = 0; + mapping->binding.count = vk_heap_binding_count_from_descriptor_range(range, + root_signature->device->vk_info.descriptor_limits.storage_image_max_descriptors); + offset->static_offset = range->offset; + offset->dynamic_offset_index = context->push_constant_index; +} + +static void d3d12_root_signature_map_descriptor_heap_binding(struct d3d12_root_signature *root_signature, + const struct d3d12_root_descriptor_table_range *range, enum vkd3d_shader_visibility shader_visibility, + struct vkd3d_descriptor_set_context *context) +{ + bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; + + if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + { + d3d12_root_signature_map_vk_heap_binding(root_signature, range, true, shader_visibility, context); + if (range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) + d3d12_root_signature_map_vk_heap_uav_counter(root_signature, range, shader_visibility, context); + } + + d3d12_root_signature_map_vk_heap_binding(root_signature, range, is_buffer, shader_visibility, context); +} + static void d3d12_root_signature_map_descriptor_unbounded_binding(struct d3d12_root_signature *root_signature, const struct d3d12_root_descriptor_table_range *range, unsigned int descriptor_offset, enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) @@ -868,6 +993,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo struct vkd3d_descriptor_set_context *context) { const struct d3d12_device *device = root_signature->device; + bool use_vk_heaps = root_signature->device->use_vk_heaps; struct d3d12_root_descriptor_table *table; unsigned int i, j, k, range_count; uint32_t vk_binding; @@ -935,6 +1061,16 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo range = &table->ranges[j]; + if (use_vk_heaps) + { + /* set, binding and vk_binding_count are not used. */ + range->set = 0; + range->binding = 0; + range->vk_binding_count = 0; + d3d12_root_signature_map_descriptor_heap_binding(root_signature, range, shader_visibility, context); + continue; + } + range->set = root_signature->vk_set_count - root_signature->main_set; if (root_signature->use_descriptor_arrays) @@ -1014,6 +1150,7 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo context->current_binding = cur_binding; } + ++context->push_constant_index; } return S_OK; @@ -1084,9 +1221,36 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa } context->current_binding = cur_binding; + if (device->use_vk_heaps) + return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); + return S_OK; } +static void d3d12_root_signature_init_descriptor_table_push_constants(struct d3d12_root_signature *root_signature, + const struct vkd3d_descriptor_set_context *context) +{ + root_signature->descriptor_table_offset = 0; + if ((root_signature->descriptor_table_count = context->push_constant_index)) + { + VkPushConstantRange *range = &root_signature->push_constant_ranges[D3D12_SHADER_VISIBILITY_ALL]; + + root_signature->descriptor_table_offset = align(range->size, 16); + range->size = root_signature->descriptor_table_offset + + root_signature->descriptor_table_count * sizeof(uint32_t); + + if (range->size > root_signature->device->vk_info.device_limits.maxPushConstantsSize) + FIXME("Push constants size %u exceeds maximum allowed size %u. Try VKD3D_CONFIG=virtual_heaps.\n", + range->size, root_signature->device->vk_info.device_limits.maxPushConstantsSize); + + if (!root_signature->push_constant_range_count) + { + root_signature->push_constant_range_count = 1; + range->stageFlags = VK_SHADER_STAGE_ALL; + } + } +} + static bool vk_binding_uses_partial_binding(const VkDescriptorSetLayoutBinding *binding) { if (binding->descriptorCount == 1) @@ -1194,11 +1358,19 @@ static HRESULT vkd3d_create_pipeline_layout(struct d3d12_device *device, static unsigned int d3d12_root_signature_copy_descriptor_set_layouts(const struct d3d12_root_signature *root_signature, VkDescriptorSetLayout *vk_set_layouts) { + const struct d3d12_device *device = root_signature->device; + enum vkd3d_vk_descriptor_set_index set; unsigned int i; for (i = 0; i < root_signature->vk_set_count; ++i) vk_set_layouts[i] = root_signature->descriptor_set_layouts[i].vk_layout; + if (device->use_vk_heaps) + { + for (set = 0; set < ARRAY_SIZE(device->vk_descriptor_heap_layouts); ++set) + vk_set_layouts[i++] = device->vk_descriptor_heap_layouts[set].vk_set_layout; + } + return i; } @@ -1210,6 +1382,7 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa struct vkd3d_descriptor_set_context context; VkDescriptorSetLayoutBinding *binding_desc; struct d3d12_root_signature_info info; + bool use_vk_heaps; unsigned int i; HRESULT hr; @@ -1226,6 +1399,8 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa root_signature->flags = desc->Flags; root_signature->descriptor_mapping = NULL; root_signature->descriptor_offsets = NULL; + root_signature->uav_counter_mapping = NULL; + root_signature->uav_counter_offsets = NULL; root_signature->static_sampler_count = 0; root_signature->static_samplers = NULL; root_signature->device = device; @@ -1243,9 +1418,13 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa } root_signature->binding_count = info.binding_count; + root_signature->uav_mapping_count = info.uav_range_count; root_signature->static_sampler_count = desc->NumStaticSamplers; root_signature->root_descriptor_count = info.root_descriptor_count; root_signature->use_descriptor_arrays = device->vk_info.EXT_descriptor_indexing; + root_signature->descriptor_table_count = 0; + + use_vk_heaps = device->use_vk_heaps; hr = E_OUTOFMEMORY; root_signature->parameter_count = desc->NumParameters; @@ -1255,6 +1434,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa if (!(root_signature->descriptor_mapping = vkd3d_calloc(root_signature->binding_count, sizeof(*root_signature->descriptor_mapping)))) goto fail; + if (use_vk_heaps && (!(root_signature->uav_counter_mapping = vkd3d_calloc(root_signature->uav_mapping_count, + sizeof(*root_signature->uav_counter_mapping))) + || !(root_signature->uav_counter_offsets = vkd3d_calloc(root_signature->uav_mapping_count, + sizeof(*root_signature->uav_counter_offsets))))) + goto fail; if (root_signature->use_descriptor_arrays && !(root_signature->descriptor_offsets = vkd3d_calloc( root_signature->binding_count, sizeof(*root_signature->descriptor_offsets)))) goto fail; @@ -1289,8 +1473,11 @@ static HRESULT d3d12_root_signature_init(struct d3d12_root_signature *root_signa goto fail; if (FAILED(hr = d3d12_root_signature_init_static_samplers(root_signature, device, desc, &context))) goto fail; + context.push_constant_index = 0; if (FAILED(hr = d3d12_root_signature_init_root_descriptor_tables(root_signature, desc, &info, &context))) goto fail; + if (use_vk_heaps) + d3d12_root_signature_init_descriptor_table_push_constants(root_signature, &context); if (FAILED(hr = d3d12_root_signature_append_descriptor_set_layout(root_signature, &context, 0))) goto fail; @@ -1978,6 +2165,9 @@ static HRESULT d3d12_pipeline_state_find_and_init_uav_counters(struct d3d12_pipe HRESULT hr; int ret; + if (device->use_vk_heaps) + return S_OK; + shader_info.type = VKD3D_SHADER_STRUCTURE_TYPE_SCAN_DESCRIPTOR_INFO; shader_info.next = NULL; if ((ret = vkd3d_scan_dxbc(code, &shader_info)) < 0) @@ -2030,10 +2220,10 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st { offset_info.type = VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO; offset_info.next = NULL; - offset_info.descriptor_table_offset = 0; - offset_info.descriptor_table_count = 0; + offset_info.descriptor_table_offset = root_signature->descriptor_table_offset; + offset_info.descriptor_table_count = root_signature->descriptor_table_count; offset_info.binding_offsets = root_signature->descriptor_offsets; - offset_info.uav_counter_offsets = NULL; + offset_info.uav_counter_offsets = root_signature->uav_counter_offsets; vkd3d_prepend_struct(&target_info, &offset_info); } @@ -2045,8 +2235,16 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st shader_interface.push_constant_buffer_count = root_signature->root_constant_count; shader_interface.combined_samplers = NULL; shader_interface.combined_sampler_count = 0; - shader_interface.uav_counters = state->uav_counters.bindings; - shader_interface.uav_counter_count = state->uav_counters.binding_count; + if (root_signature->uav_counter_mapping) + { + shader_interface.uav_counters = root_signature->uav_counter_mapping; + shader_interface.uav_counter_count = root_signature->uav_mapping_count; + } + else + { + shader_interface.uav_counters = state->uav_counters.bindings; + shader_interface.uav_counter_count = state->uav_counters.binding_count; + } vk_pipeline_layout = state->uav_counters.vk_pipeline_layout ? state->uav_counters.vk_pipeline_layout : root_signature->vk_pipeline_layout; @@ -2807,10 +3005,10 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s { offset_info.type = VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO; offset_info.next = NULL; - offset_info.descriptor_table_offset = 0; - offset_info.descriptor_table_count = 0; + offset_info.descriptor_table_offset = root_signature->descriptor_table_offset; + offset_info.descriptor_table_count = root_signature->descriptor_table_count; offset_info.binding_offsets = root_signature->descriptor_offsets; - offset_info.uav_counter_offsets = NULL; + offset_info.uav_counter_offsets = root_signature->uav_counter_offsets; } for (i = 0; i < ARRAY_SIZE(shader_stages); ++i) @@ -2852,8 +3050,10 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s break; case VK_SHADER_STAGE_FRAGMENT_BIT: - shader_interface.uav_counters = state->uav_counters.bindings; - shader_interface.uav_counter_count = state->uav_counters.binding_count; + shader_interface.uav_counters = root_signature->uav_counter_mapping + ? root_signature->uav_counter_mapping : state->uav_counters.bindings; + shader_interface.uav_counter_count = root_signature->uav_counter_mapping + ? root_signature->uav_mapping_count : state->uav_counters.binding_count; stage_target_info = &ps_target_info; break; diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index ac0d5ce1..3d5ce483 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -156,6 +156,7 @@ struct vkd3d_vulkan_info enum vkd3d_config_flags { VKD3D_CONFIG_FLAG_VULKAN_DEBUG = 0x00000001, + VKD3D_CONFIG_FLAG_VIRTUAL_HEAPS = 0x00000002, }; struct vkd3d_instance @@ -772,11 +773,51 @@ static inline struct d3d12_dsv_desc *d3d12_dsv_desc_from_cpu_handle(D3D12_CPU_DE void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_device *device, struct d3d12_resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc); +enum vkd3d_vk_descriptor_set_index +{ + VKD3D_SET_INDEX_UNIFORM_BUFFER = 0, + VKD3D_SET_INDEX_UNIFORM_TEXEL_BUFFER = 1, + VKD3D_SET_INDEX_SAMPLED_IMAGE = 2, + VKD3D_SET_INDEX_STORAGE_TEXEL_BUFFER = 3, + VKD3D_SET_INDEX_STORAGE_IMAGE = 4, + VKD3D_SET_INDEX_SAMPLER = 5, + VKD3D_SET_INDEX_UAV_COUNTER = 6, + VKD3D_SET_INDEX_COUNT = 7 +}; + +extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[]; + +static inline enum vkd3d_vk_descriptor_set_index vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( + VkDescriptorType type) +{ + assert(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); + assert(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT); + + return vk_descriptor_set_index_table[type]; +} + +struct vkd3d_vk_descriptor_heap_layout +{ + VkDescriptorType type; + bool buffer_dimension; + D3D12_DESCRIPTOR_HEAP_TYPE applicable_heap_type; + unsigned int count; + VkDescriptorSetLayout vk_set_layout; +}; + +struct d3d12_descriptor_heap_vk_set +{ + VkDescriptorSet vk_set; + VkDescriptorImageInfo vk_image_info; + VkWriteDescriptorSet vk_descriptor_write; +}; + /* ID3D12DescriptorHeap */ struct d3d12_descriptor_heap { ID3D12DescriptorHeap ID3D12DescriptorHeap_iface; LONG refcount; + uint64_t serial_id; D3D12_DESCRIPTOR_HEAP_DESC desc; @@ -784,6 +825,10 @@ struct d3d12_descriptor_heap struct vkd3d_private_store private_store; + VkDescriptorPool vk_descriptor_pool; + struct d3d12_descriptor_heap_vk_set vk_descriptor_sets[VKD3D_SET_INDEX_COUNT]; + struct vkd3d_mutex vk_sets_mutex; + BYTE descriptors[]; }; @@ -898,8 +943,13 @@ struct d3d12_root_signature D3D12_ROOT_SIGNATURE_FLAGS flags; unsigned int binding_count; + unsigned int uav_mapping_count; struct vkd3d_shader_resource_binding *descriptor_mapping; struct vkd3d_shader_descriptor_offset *descriptor_offsets; + struct vkd3d_shader_uav_counter_binding *uav_counter_mapping; + struct vkd3d_shader_descriptor_offset *uav_counter_offsets; + unsigned int descriptor_table_offset; + unsigned int descriptor_table_count; unsigned int root_constant_count; struct vkd3d_shader_push_constant_buffer *root_constants; @@ -1118,6 +1168,8 @@ struct vkd3d_pipeline_bindings struct d3d12_desc *descriptor_tables[D3D12_MAX_ROOT_COST]; uint64_t descriptor_table_dirty_mask; uint64_t descriptor_table_active_mask; + uint64_t cbv_srv_uav_heap_id; + uint64_t sampler_heap_id; VkBufferView *vk_uav_counter_views; size_t vk_uav_counter_views_size; @@ -1179,6 +1231,8 @@ struct d3d12_command_list VkBuffer so_counter_buffers[D3D12_SO_BUFFER_SLOT_COUNT]; VkDeviceSize so_counter_buffer_offsets[D3D12_SO_BUFFER_SLOT_COUNT]; + void (*update_descriptors)(struct d3d12_command_list *list, enum vkd3d_pipeline_bind_point bind_point); + struct vkd3d_private_store private_store; }; @@ -1370,6 +1424,8 @@ struct d3d12_device struct vkd3d_uav_clear_state uav_clear_state; VkDescriptorPoolSize vk_pool_sizes[VKD3D_DESCRIPTOR_POOL_COUNT]; + struct vkd3d_vk_descriptor_heap_layout vk_descriptor_heap_layouts[VKD3D_SET_INDEX_COUNT]; + bool use_vk_heaps; }; HRESULT d3d12_device_create(struct vkd3d_instance *instance, diff --git a/tests/d3d12.c b/tests/d3d12.c index 997b544f..29bd40c8 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -16758,13 +16758,15 @@ static void test_update_descriptor_tables(void) destroy_test_context(&context); } -/* This cannot be implemented reasonably in Vulkan. Vulkan doesn't allow - * updating descriptor sets after the vkCmdBindDescriptorSets() command - * is recorded. +/* This requires the Vulkan descriptor indexing extension and Vulkan-backed + * descriptor heaps. Vulkan doesn't allow updating descriptor sets after the + * vkCmdBindDescriptorSets() command is recorded unless the update-after-bind + * feature of descriptor indexing is used. */ static void test_update_descriptor_heap_after_closing_command_list(void) { ID3D12Resource *red_texture, *green_texture; + D3D12_RESOURCE_BINDING_TIER binding_tier; ID3D12GraphicsCommandList *command_list; D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle; ID3D12DescriptorHeap *cpu_heap, *heap; @@ -16814,6 +16816,8 @@ static void test_update_descriptor_heap_after_closing_command_list(void) command_list = context.list; queue = context.queue; + binding_tier = get_resource_binding_tier(context.device); + context.root_signature = create_texture_root_signature(context.device, D3D12_SHADER_VISIBILITY_PIXEL, 0, 0); context.pipeline_state = create_pipeline_state(context.device, @@ -16875,7 +16879,8 @@ static void test_update_descriptor_heap_after_closing_command_list(void) D3D12_RESOURCE_STATE_RENDER_TARGET, D3D12_RESOURCE_STATE_COPY_SOURCE); get_texture_readback_with_command_list(context.render_target, 0, &rb, queue, command_list); value = get_readback_uint(&rb, 0, 0, 0); - todo ok(value == 0xff00ff00, "Got unexpected value %#x.\n", value); + todo_if(binding_tier < D3D12_RESOURCE_BINDING_TIER_3) + ok(value == 0xff00ff00, "Got unexpected value %#x.\n", value); release_resource_readback(&rb); ID3D12DescriptorHeap_Release(cpu_heap);