From 126a789019ae0450521c456f299a8c7b5c3c7a63 Mon Sep 17 00:00:00 2001 From: Philip Rebohle Date: Mon, 25 Nov 2019 17:35:37 +0330 Subject: [PATCH] vkd3d: Implement d3d12_command_list_ClearUnorderedAccessViewFloat(). Signed-off-by: Philip Rebohle Signed-off-by: Henri Verbeet Signed-off-by: Alexandre Julliard --- Makefile.am | 1 + libs/vkd3d/command.c | 184 +++++++++++++++++- libs/vkd3d/device.c | 6 + libs/vkd3d/state.c | 222 ++++++++++++++++++--- libs/vkd3d/vkd3d_private.h | 38 ++++ libs/vkd3d/vkd3d_shaders.h | 388 +++++++++++++++++++++++++++++++++++++ tests/d3d12.c | 48 ++--- 7 files changed, 836 insertions(+), 51 deletions(-) create mode 100644 libs/vkd3d/vkd3d_shaders.h diff --git a/Makefile.am b/Makefile.am index 2b0e8f3f..00a5f589 100644 --- a/Makefile.am +++ b/Makefile.am @@ -108,6 +108,7 @@ libvkd3d_la_SOURCES = \ libs/vkd3d/vkd3d.map \ libs/vkd3d/vkd3d_main.c \ libs/vkd3d/vkd3d_private.h \ + libs/vkd3d/vkd3d_shaders.h \ libs/vkd3d/vulkan_procs.h \ libs/vkd3d_version.c libvkd3d_la_LDFLAGS = $(AM_LDFLAGS) -version-info 2:0:1 diff --git a/libs/vkd3d/command.c b/libs/vkd3d/command.c index 297054b3..ade51bad 100644 --- a/libs/vkd3d/command.c +++ b/libs/vkd3d/command.c @@ -4803,6 +4803,182 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12Gra &clear_value, rect_count, rects); } +struct vkd3d_uav_clear_pipeline +{ + VkDescriptorSetLayout vk_set_layout; + VkPipelineLayout vk_pipeline_layout; + VkPipeline vk_pipeline; + VkExtent3D group_size; +}; + +static void vkd3d_uav_clear_state_get_buffer_pipeline(const struct vkd3d_uav_clear_state *state, + enum vkd3d_format_type format_type, struct vkd3d_uav_clear_pipeline *info) +{ + const struct vkd3d_uav_clear_pipelines *pipelines; + + pipelines = format_type == VKD3D_FORMAT_TYPE_UINT ? &state->pipelines_uint : &state->pipelines_float; + info->vk_set_layout = state->vk_set_layout_buffer; + info->vk_pipeline_layout = state->vk_pipeline_layout_buffer; + info->vk_pipeline = pipelines->buffer; + info->group_size = (VkExtent3D){128, 1, 1}; +} + +static void vkd3d_uav_clear_state_get_image_pipeline(const struct vkd3d_uav_clear_state *state, + VkImageViewType image_view_type, enum vkd3d_format_type format_type, struct vkd3d_uav_clear_pipeline *info) +{ + const struct vkd3d_uav_clear_pipelines *pipelines; + + pipelines = format_type == VKD3D_FORMAT_TYPE_UINT ? &state->pipelines_uint : &state->pipelines_float; + info->vk_set_layout = state->vk_set_layout_image; + info->vk_pipeline_layout = state->vk_pipeline_layout_image; + + switch (image_view_type) + { + case VK_IMAGE_VIEW_TYPE_1D: + info->vk_pipeline = pipelines->image_1d; + info->group_size = (VkExtent3D){64, 1, 1}; + break; + + case VK_IMAGE_VIEW_TYPE_1D_ARRAY: + info->vk_pipeline = pipelines->image_1d_array; + info->group_size = (VkExtent3D){64, 1, 1}; + break; + + case VK_IMAGE_VIEW_TYPE_2D: + info->vk_pipeline = pipelines->image_2d; + info->group_size = (VkExtent3D){8, 8, 1}; + break; + + case VK_IMAGE_VIEW_TYPE_2D_ARRAY: + info->vk_pipeline = pipelines->image_2d_array; + info->group_size = (VkExtent3D){8, 8, 1}; + break; + + case VK_IMAGE_VIEW_TYPE_3D: + info->vk_pipeline = pipelines->image_3d; + info->group_size = (VkExtent3D){8, 8, 1}; + break; + + default: + ERR("Unhandled view type %#x.\n", image_view_type); + info->vk_pipeline = VK_NULL_HANDLE; + info->group_size = (VkExtent3D){0, 0, 0}; + break; + } +} + +static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, + struct d3d12_resource *resource, struct vkd3d_view *view, const VkClearColorValue *clear_colour, + unsigned int rect_count, const D3D12_RECT *rects) +{ + const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; + unsigned int i, miplevel_idx, layer_count; + struct vkd3d_uav_clear_pipeline pipeline; + struct vkd3d_uav_clear_args clear_args; + VkDescriptorImageInfo image_info; + D3D12_RECT full_rect, curr_rect; + VkWriteDescriptorSet write_set; + + d3d12_command_list_track_resource_usage(list, resource); + d3d12_command_list_end_current_render_pass(list); + + d3d12_command_list_invalidate_current_pipeline(list); + d3d12_command_list_invalidate_bindings(list, list->state); + d3d12_command_list_invalidate_root_parameters(list, VK_PIPELINE_BIND_POINT_COMPUTE); + + if (!d3d12_command_allocator_add_view(list->allocator, view)) + WARN("Failed to add view.\n"); + + clear_args.colour = *clear_colour; + + write_set.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + write_set.pNext = NULL; + write_set.dstBinding = 0; + write_set.dstArrayElement = 0; + write_set.descriptorCount = 1; + + if (d3d12_resource_is_buffer(resource)) + { + write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER; + write_set.pImageInfo = NULL; + write_set.pBufferInfo = NULL; + write_set.pTexelBufferView = &view->u.vk_buffer_view; + + miplevel_idx = 0; + layer_count = 1; + vkd3d_uav_clear_state_get_buffer_pipeline(&list->device->uav_clear_state, + view->format->type, &pipeline); + } + else + { + image_info.sampler = VK_NULL_HANDLE; + image_info.imageView = view->u.vk_image_view; + image_info.imageLayout = VK_IMAGE_LAYOUT_GENERAL; + + write_set.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; + write_set.pImageInfo = &image_info; + write_set.pBufferInfo = NULL; + write_set.pTexelBufferView = NULL; + + miplevel_idx = view->info.texture.miplevel_idx; + layer_count = view->info.texture.vk_view_type == VK_IMAGE_VIEW_TYPE_3D + ? d3d12_resource_desc_get_depth(&resource->desc, miplevel_idx) + : view->info.texture.layer_count; + vkd3d_uav_clear_state_get_image_pipeline(&list->device->uav_clear_state, + view->info.texture.vk_view_type, view->format->type, &pipeline); + } + + if (!(write_set.dstSet = d3d12_command_allocator_allocate_descriptor_set( + list->allocator, pipeline.vk_set_layout))) + { + ERR("Failed to allocate descriptor set.\n"); + return; + } + + VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &write_set, 0, NULL)); + + full_rect.left = 0; + full_rect.right = d3d12_resource_desc_get_width(&resource->desc, miplevel_idx); + full_rect.top = 0; + full_rect.bottom = d3d12_resource_desc_get_height(&resource->desc, miplevel_idx); + + if (!rect_count) + { + rects = &full_rect; + rect_count = 1; + } + + VK_CALL(vkCmdBindPipeline(list->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline.vk_pipeline)); + + VK_CALL(vkCmdBindDescriptorSets(list->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, + pipeline.vk_pipeline_layout, 0, 1, &write_set.dstSet, 0, NULL)); + + for (i = 0; i < rect_count; ++i) + { + /* Clamp to the actual resource region and skip empty rectangles. */ + curr_rect.left = max(rects[i].left, full_rect.left); + curr_rect.top = max(rects[i].top, full_rect.top); + curr_rect.right = min(rects[i].right, full_rect.right); + curr_rect.bottom = min(rects[i].bottom, full_rect.bottom); + + if (curr_rect.left >= curr_rect.right || curr_rect.top >= curr_rect.bottom) + continue; + + clear_args.offset.x = curr_rect.left; + clear_args.offset.y = curr_rect.top; + clear_args.extent.width = curr_rect.right - curr_rect.left; + clear_args.extent.height = curr_rect.bottom - curr_rect.top; + + VK_CALL(vkCmdPushConstants(list->vk_command_buffer, pipeline.vk_pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(clear_args), &clear_args)); + + VK_CALL(vkCmdDispatch(list->vk_command_buffer, + vkd3d_compute_workgroup_count(clear_args.extent.width, pipeline.group_size.width), + vkd3d_compute_workgroup_count(clear_args.extent.height, pipeline.group_size.height), + vkd3d_compute_workgroup_count(layer_count, pipeline.group_size.depth))); + } +} + static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList1 *iface, D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, const UINT values[4], UINT rect_count, const D3D12_RECT *rects) @@ -4906,13 +5082,17 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(I { struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList1(iface); struct d3d12_resource *resource_impl; + VkClearColorValue colour; + struct vkd3d_view *view; - FIXME("iface %p, gpu_handle %#"PRIx64", cpu_handle %lx, resource %p, values %p, rect_count %u, rects %p stub!\n", + TRACE("iface %p, gpu_handle %#"PRIx64", cpu_handle %lx, resource %p, values %p, rect_count %u, rects %p.\n", iface, gpu_handle.ptr, cpu_handle.ptr, resource, values, rect_count, rects); resource_impl = unsafe_impl_from_ID3D12Resource(resource); + view = d3d12_desc_from_cpu_handle(cpu_handle)->u.view; + memcpy(colour.float32, values, sizeof(colour.float32)); - d3d12_command_list_track_resource_usage(list, resource_impl); + d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); } static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList1 *iface, diff --git a/libs/vkd3d/device.c b/libs/vkd3d/device.c index 06243188..59859add 100644 --- a/libs/vkd3d/device.c +++ b/libs/vkd3d/device.c @@ -2154,6 +2154,7 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device *iface) vkd3d_private_store_destroy(&device->private_store); vkd3d_cleanup_format_info(device); + vkd3d_uav_clear_state_cleanup(&device->uav_clear_state, device); vkd3d_destroy_null_resources(&device->null_resources, device); vkd3d_gpu_va_allocator_cleanup(&device->gpu_va_allocator); vkd3d_render_pass_cache_cleanup(&device->render_pass_cache, device); @@ -3447,6 +3448,9 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, if (FAILED(hr = vkd3d_init_null_resources(&device->null_resources, device))) goto out_cleanup_format_info; + if (FAILED(hr = vkd3d_uav_clear_state_init(&device->uav_clear_state, device))) + goto out_destroy_null_resources; + vkd3d_render_pass_cache_init(&device->render_pass_cache); vkd3d_gpu_va_allocator_init(&device->gpu_va_allocator); @@ -3458,6 +3462,8 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, return S_OK; +out_destroy_null_resources: + vkd3d_destroy_null_resources(&device->null_resources, device); out_cleanup_format_info: vkd3d_cleanup_format_info(device); out_stop_fence_worker: diff --git a/libs/vkd3d/state.c b/libs/vkd3d/state.c index a321fa47..e1f7da98 100644 --- a/libs/vkd3d/state.c +++ b/libs/vkd3d/state.c @@ -18,6 +18,7 @@ */ #include "vkd3d_private.h" +#include "vkd3d_shaders.h" /* ID3D12RootSignature */ static inline struct d3d12_root_signature *impl_from_ID3D12RootSignature(ID3D12RootSignature *iface) @@ -1384,6 +1385,37 @@ static HRESULT create_shader_stage(struct d3d12_device *device, return S_OK; } +static HRESULT vkd3d_create_compute_pipeline(struct d3d12_device *device, + const D3D12_SHADER_BYTECODE *code, const struct vkd3d_shader_interface_info *shader_interface, + VkPipelineLayout vk_pipeline_layout, VkPipeline *vk_pipeline) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + VkComputePipelineCreateInfo pipeline_info; + VkResult vr; + HRESULT hr; + + pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + pipeline_info.pNext = NULL; + pipeline_info.flags = 0; + if (FAILED(hr = create_shader_stage(device, &pipeline_info.stage, + VK_SHADER_STAGE_COMPUTE_BIT, code, shader_interface, NULL))) + return hr; + pipeline_info.layout = vk_pipeline_layout; + pipeline_info.basePipelineHandle = VK_NULL_HANDLE; + pipeline_info.basePipelineIndex = -1; + + vr = VK_CALL(vkCreateComputePipelines(device->vk_device, + VK_NULL_HANDLE, 1, &pipeline_info, NULL, vk_pipeline)); + VK_CALL(vkDestroyShaderModule(device->vk_device, pipeline_info.stage.module, NULL)); + if (vr < 0) + { + WARN("Failed to create Vulkan compute pipeline, hr %#x.", hr); + return hresult_from_vk_result(vr); + } + + return S_OK; +} + static HRESULT d3d12_pipeline_state_init_compute_uav_counters(struct d3d12_pipeline_state *state, struct d3d12_device *device, const struct d3d12_root_signature *root_signature, const struct vkd3d_shader_scan_info *shader_info) @@ -1470,10 +1502,9 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; struct vkd3d_shader_interface_info shader_interface; const struct d3d12_root_signature *root_signature; - VkComputePipelineCreateInfo pipeline_info; struct vkd3d_shader_scan_info shader_info; + VkPipelineLayout vk_pipeline_layout; struct vkd3d_shader_code dxbc; - VkResult vr; HRESULT hr; int ret; @@ -1519,12 +1550,12 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st shader_interface.uav_counters = state->uav_counters; shader_interface.uav_counter_count = vkd3d_popcount(state->uav_counter_mask); - pipeline_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; - pipeline_info.pNext = NULL; - pipeline_info.flags = 0; - if (FAILED(hr = create_shader_stage(device, &pipeline_info.stage, - VK_SHADER_STAGE_COMPUTE_BIT, &desc->CS, &shader_interface, NULL))) + vk_pipeline_layout = state->vk_pipeline_layout + ? state->vk_pipeline_layout : root_signature->vk_pipeline_layout; + if (FAILED(hr = vkd3d_create_compute_pipeline(device, &desc->CS, &shader_interface, + vk_pipeline_layout, &state->u.compute.vk_pipeline))) { + WARN("Failed to create Vulkan compute pipeline, hr %#x.\n", hr); if (state->vk_set_layout) VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, state->vk_set_layout, NULL)); if (state->vk_pipeline_layout) @@ -1532,24 +1563,6 @@ static HRESULT d3d12_pipeline_state_init_compute(struct d3d12_pipeline_state *st vkd3d_free(state->uav_counters); return hr; } - pipeline_info.layout = state->vk_pipeline_layout - ? state->vk_pipeline_layout : root_signature->vk_pipeline_layout; - pipeline_info.basePipelineHandle = VK_NULL_HANDLE; - pipeline_info.basePipelineIndex = -1; - - vr = VK_CALL(vkCreateComputePipelines(device->vk_device, VK_NULL_HANDLE, - 1, &pipeline_info, NULL, &state->u.compute.vk_pipeline)); - VK_CALL(vkDestroyShaderModule(device->vk_device, pipeline_info.stage.module, NULL)); - if (vr) - { - WARN("Failed to create Vulkan compute pipeline, vr %d.\n", vr); - if (state->vk_set_layout) - VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, state->vk_set_layout, NULL)); - if (state->vk_pipeline_layout) - VK_CALL(vkDestroyPipelineLayout(device->vk_device, state->vk_pipeline_layout, NULL)); - vkd3d_free(state->uav_counters); - return hresult_from_vk_result(vr); - } if (FAILED(hr = vkd3d_private_store_init(&state->private_store))) { @@ -2802,3 +2815,162 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta ERR("Could not get the pipeline compiled by other thread from the cache.\n"); return vk_pipeline; } + +static void vkd3d_uav_clear_pipelines_cleanup(struct vkd3d_uav_clear_pipelines *pipelines, + struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + VK_CALL(vkDestroyPipeline(device->vk_device, pipelines->image_3d, NULL)); + VK_CALL(vkDestroyPipeline(device->vk_device, pipelines->image_2d_array, NULL)); + VK_CALL(vkDestroyPipeline(device->vk_device, pipelines->image_2d, NULL)); + VK_CALL(vkDestroyPipeline(device->vk_device, pipelines->image_1d_array, NULL)); + VK_CALL(vkDestroyPipeline(device->vk_device, pipelines->image_1d, NULL)); + VK_CALL(vkDestroyPipeline(device->vk_device, pipelines->buffer, NULL)); +} + +void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device) +{ + const struct vkd3d_vk_device_procs *vk_procs = &device->vk_procs; + + vkd3d_uav_clear_pipelines_cleanup(&state->pipelines_uint, device); + vkd3d_uav_clear_pipelines_cleanup(&state->pipelines_float, device); + + VK_CALL(vkDestroyPipelineLayout(device->vk_device, state->vk_pipeline_layout_image, NULL)); + VK_CALL(vkDestroyPipelineLayout(device->vk_device, state->vk_pipeline_layout_buffer, NULL)); + + VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, state->vk_set_layout_image, NULL)); + VK_CALL(vkDestroyDescriptorSetLayout(device->vk_device, state->vk_set_layout_buffer, NULL)); +} + +HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device) +{ + struct vkd3d_shader_push_constant_buffer push_constant; + struct vkd3d_shader_interface_info shader_interface; + struct vkd3d_shader_resource_binding binding; + VkDescriptorSetLayoutBinding set_binding; + VkPushConstantRange push_constant_range; + unsigned int i; + HRESULT hr; + + const struct + { + VkDescriptorSetLayout *set_layout; + VkPipelineLayout *pipeline_layout; + VkDescriptorType descriptor_type; + } + set_layouts[] = + { + {&state->vk_set_layout_buffer, &state->vk_pipeline_layout_buffer, VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER}, + {&state->vk_set_layout_image, &state->vk_pipeline_layout_image, VK_DESCRIPTOR_TYPE_STORAGE_IMAGE}, + }; + + const struct + { + VkPipeline *pipeline; + VkPipelineLayout *pipeline_layout; + D3D12_SHADER_BYTECODE code; + } + pipelines[] = + { +#define SHADER_CODE(name) {name, sizeof(name)} + {&state->pipelines_float.buffer, &state->vk_pipeline_layout_buffer, + SHADER_CODE(cs_uav_clear_buffer_float_code)}, + {&state->pipelines_float.image_1d, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_1d_float_code)}, + {&state->pipelines_float.image_1d_array, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_1d_array_float_code)}, + {&state->pipelines_float.image_2d, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_2d_float_code)}, + {&state->pipelines_float.image_2d_array, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_2d_array_float_code)}, + {&state->pipelines_float.image_3d, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_3d_float_code)}, + + {&state->pipelines_uint.buffer, &state->vk_pipeline_layout_buffer, + SHADER_CODE(cs_uav_clear_buffer_uint_code)}, + {&state->pipelines_uint.image_1d, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_1d_uint_code)}, + {&state->pipelines_uint.image_1d_array, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_1d_array_uint_code)}, + {&state->pipelines_uint.image_2d, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_2d_uint_code)}, + {&state->pipelines_uint.image_2d_array, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_2d_array_uint_code)}, + {&state->pipelines_uint.image_3d, &state->vk_pipeline_layout_image, + SHADER_CODE(cs_uav_clear_3d_uint_code)}, +#undef SHADER_CODE + }; + + memset(state, 0, sizeof(*state)); + + set_binding.binding = 0; + set_binding.descriptorCount = 1; + set_binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + set_binding.pImmutableSamplers = NULL; + + binding.type = VKD3D_SHADER_DESCRIPTOR_TYPE_UAV; + binding.register_index = 0; + binding.shader_visibility = VKD3D_SHADER_VISIBILITY_COMPUTE; + binding.binding.set = 0; + binding.binding.binding = 0; + + push_constant_range.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + push_constant_range.offset = 0; + push_constant_range.size = sizeof(struct vkd3d_uav_clear_args); + + push_constant.register_index = 0; + push_constant.shader_visibility = VKD3D_SHADER_VISIBILITY_COMPUTE; + push_constant.offset = 0; + push_constant.size = sizeof(struct vkd3d_uav_clear_args); + + for (i = 0; i < ARRAY_SIZE(set_layouts); ++i) + { + set_binding.descriptorType = set_layouts[i].descriptor_type; + + if (FAILED(hr = vkd3d_create_descriptor_set_layout(device, 0, 1, &set_binding, set_layouts[i].set_layout))) + { + ERR("Failed to create descriptor set layout %u, hr %#x.", i, hr); + goto fail; + } + + if (FAILED(hr = vkd3d_create_pipeline_layout(device, 1, set_layouts[i].set_layout, + 1, &push_constant_range, set_layouts[i].pipeline_layout))) + { + ERR("Failed to create pipeline layout %u, hr %#x.", i, hr); + goto fail; + } + } + + shader_interface.type = VKD3D_SHADER_STRUCTURE_TYPE_SHADER_INTERFACE_INFO; + shader_interface.next = NULL; + shader_interface.bindings = &binding; + shader_interface.binding_count = 1; + shader_interface.push_constant_buffers = &push_constant; + shader_interface.push_constant_buffer_count = 1; + shader_interface.combined_samplers = NULL; + shader_interface.combined_sampler_count = 0; + shader_interface.uav_counters = NULL; + shader_interface.uav_counter_count = 0; + + for (i = 0; i < ARRAY_SIZE(pipelines); ++i) + { + if (pipelines[i].pipeline_layout == &state->vk_pipeline_layout_buffer) + binding.flags = VKD3D_SHADER_BINDING_FLAG_BUFFER; + else + binding.flags = VKD3D_SHADER_BINDING_FLAG_IMAGE; + + if (FAILED(hr = vkd3d_create_compute_pipeline(device, &pipelines[i].code, &shader_interface, + *pipelines[i].pipeline_layout, pipelines[i].pipeline))) + { + ERR("Failed to create compute pipeline %u, hr %#x.", i, hr); + goto fail; + } + } + + return S_OK; + +fail: + vkd3d_uav_clear_state_cleanup(state, device); + return hr; +} diff --git a/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/vkd3d_private.h index c6bfe964..933c7c46 100644 --- a/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/vkd3d_private.h @@ -1059,6 +1059,38 @@ struct vkd3d_format_compatibility_list VkFormat vk_formats[VKD3D_MAX_COMPATIBLE_FORMAT_COUNT]; }; +struct vkd3d_uav_clear_args +{ + VkClearColorValue colour; + VkOffset2D offset; + VkExtent2D extent; +}; + +struct vkd3d_uav_clear_pipelines +{ + VkPipeline buffer; + VkPipeline image_1d; + VkPipeline image_1d_array; + VkPipeline image_2d; + VkPipeline image_2d_array; + VkPipeline image_3d; +}; + +struct vkd3d_uav_clear_state +{ + VkDescriptorSetLayout vk_set_layout_buffer; + VkDescriptorSetLayout vk_set_layout_image; + + VkPipelineLayout vk_pipeline_layout_buffer; + VkPipelineLayout vk_pipeline_layout_image; + + struct vkd3d_uav_clear_pipelines pipelines_float; + struct vkd3d_uav_clear_pipelines pipelines_uint; +}; + +HRESULT vkd3d_uav_clear_state_init(struct vkd3d_uav_clear_state *state, struct d3d12_device *device) DECLSPEC_HIDDEN; +void vkd3d_uav_clear_state_cleanup(struct vkd3d_uav_clear_state *state, struct d3d12_device *device) DECLSPEC_HIDDEN; + /* ID3D12Device */ struct d3d12_device { @@ -1104,6 +1136,7 @@ struct d3d12_device unsigned int format_compatibility_list_count; const struct vkd3d_format_compatibility_list *format_compatibility_lists; struct vkd3d_null_resources null_resources; + struct vkd3d_uav_clear_state uav_clear_state; }; HRESULT d3d12_device_create(struct vkd3d_instance *instance, @@ -1237,6 +1270,11 @@ static inline unsigned int d3d12_resource_desc_get_sub_resource_count(const D3D1 return d3d12_resource_desc_get_layer_count(desc) * desc->MipLevels; } +static inline unsigned int vkd3d_compute_workgroup_count(unsigned int thread_count, unsigned int workgroup_size) +{ + return (thread_count + workgroup_size - 1) / workgroup_size; +} + VkCompareOp vk_compare_op_from_d3d12(D3D12_COMPARISON_FUNC op) DECLSPEC_HIDDEN; VkSampleCountFlagBits vk_samples_from_dxgi_sample_desc(const DXGI_SAMPLE_DESC *desc) DECLSPEC_HIDDEN; VkSampleCountFlagBits vk_samples_from_sample_count(unsigned int sample_count) DECLSPEC_HIDDEN; diff --git a/libs/vkd3d/vkd3d_shaders.h b/libs/vkd3d/vkd3d_shaders.h new file mode 100644 index 00000000..b2a90cdb --- /dev/null +++ b/libs/vkd3d/vkd3d_shaders.h @@ -0,0 +1,388 @@ +/* + * Copyright 2019 Philip Rebohle + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_SHADERS_H +#define __VKD3D_SHADERS_H + +static const uint32_t cs_uav_clear_buffer_float_code[] = +{ +#if 0 + RWBuffer dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(128, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0xe114ba61, 0xff6a0d0b, 0x7b25c8f4, 0xfcf7cf22, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400089c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000080, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_buffer_uint_code[] = +{ +#if 0 + RWBuffer dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(128, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0x3afd0cfd, 0x5145c166, 0x5b9f76b8, 0xa73775cd, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400089c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000080, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_array_float_code[] = +{ +#if 0 + RWTexture1DArray dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[int2(u_info.dst_offset.x + thread_id.x, thread_id.y)] = u_info.clear_value; + } +#endif + 0x43425844, 0x3d73bc2d, 0x2b635f3d, 0x6bf98e92, 0xbe0aa5d9, 0x00000001, 0x0000011c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000c8, 0x00050050, 0x00000032, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400389c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x04000036, 0x001000e2, 0x00000000, 0x00020556, 0x080000a4, 0x0011e0f2, 0x00000000, + 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_array_uint_code[] = +{ +#if 0 + RWTexture1DArray dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[int2(u_info.dst_offset.x + thread_id.x, thread_id.y)] = u_info.clear_value; + } +#endif + 0x43425844, 0x2f0ca457, 0x72068b34, 0xd9dadc2b, 0xd3178c3e, 0x00000001, 0x0000011c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000c8, 0x00050050, 0x00000032, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400389c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x04000036, 0x001000e2, 0x00000000, 0x00020556, 0x080000a4, 0x0011e0f2, 0x00000000, + 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_float_code[] = +{ +#if 0 + RWTexture1D dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0x05266503, 0x4b97006f, 0x01a5cc63, 0xe617d0a1, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400109c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_1d_uint_code[] = +{ +#if 0 + RWTexture1D dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(64, 1, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (thread_id.x < u_info.dst_extent.x) + dst[u_info.dst_offset.x + thread_id.x] = u_info.clear_value; + } +#endif + 0x43425844, 0x19d5c8f2, 0x3ca4ac24, 0x9e258499, 0xf0463fd6, 0x00000001, 0x0000010c, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000b8, 0x00050050, 0x0000002e, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400109c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020012, 0x02000068, 0x00000001, 0x0400009b, 0x00000040, 0x00000001, 0x00000001, + 0x07000022, 0x00100012, 0x00000000, 0x0002000a, 0x0020802a, 0x00000000, 0x00000001, 0x0304001f, + 0x0010000a, 0x00000000, 0x0700001e, 0x00100012, 0x00000000, 0x0002000a, 0x0020800a, 0x00000000, + 0x00000001, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100006, 0x00000000, 0x00208e46, 0x00000000, + 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_array_float_code[] = +{ +#if 0 + RWTexture2DArray dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy + thread_id.xy, thread_id.z)] = u_info.clear_value; + } +#endif + 0x43425844, 0x924d2d2c, 0xb9166376, 0x99f83871, 0x8ef65025, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400409c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_array_uint_code[] = +{ +#if 0 + RWTexture2DArray dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy + thread_id.xy, thread_id.z)] = u_info.clear_value; + } +#endif + 0x43425844, 0xa92219d4, 0xa2c5e47d, 0x0d308500, 0xf32197b4, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400409c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_float_code[] = +{ +#if 0 + RWTexture2D dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[u_info.dst_offset.xy + thread_id.xy] = u_info.clear_value; + } +#endif + 0x43425844, 0x6e735b3f, 0x7348c4fa, 0xb3634e42, 0x50e2d99b, 0x00000001, 0x00000128, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000d4, 0x00050050, 0x00000035, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400189c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x001000f2, 0x00000000, 0x00020546, 0x00208546, 0x00000000, 0x00000001, + 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, + 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_2d_uint_code[] = +{ +#if 0 + RWTexture2D dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[u_info.dst_offset.xy + thread_id.xy] = u_info.clear_value; + } +#endif + 0x43425844, 0xf01db5dd, 0xc7dc5e55, 0xb017c1a8, 0x55abd52d, 0x00000001, 0x00000128, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000d4, 0x00050050, 0x00000035, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400189c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020032, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x001000f2, 0x00000000, 0x00020546, 0x00208546, 0x00000000, 0x00000001, + 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, 0x00000000, 0x00208e46, 0x00000000, 0x00000000, + 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_3d_float_code[] = +{ +#if 0 + RWTexture3D dst; + + struct + { + float4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy, 0) + thread_id.xyz] = u_info.clear_value; + } +#endif + 0x43425844, 0x5d8f36a0, 0x30fa86a5, 0xfec7f2ef, 0xdfd76cbb, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400289c, 0x0011e000, 0x00000000, 0x00005555, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +static const uint32_t cs_uav_clear_3d_uint_code[] = +{ +#if 0 + RWTexture3D dst; + + struct + { + uint4 clear_value; + int2 dst_offset; + int2 dst_extent; + } u_info; + + [numthreads(8, 8, 1)] + void main(int3 thread_id : SV_DispatchThreadID) + { + if (all(thread_id.xy < u_info.dst_extent.xy)) + dst[int3(u_info.dst_offset.xy, 0) + thread_id.xyz] = u_info.clear_value; + } +#endif + 0x43425844, 0x5b9c95b1, 0xc9bde4e3, 0x9aaff806, 0x24a1d264, 0x00000001, 0x00000138, 0x00000003, + 0x0000002c, 0x0000003c, 0x0000004c, 0x4e475349, 0x00000008, 0x00000000, 0x00000008, 0x4e47534f, + 0x00000008, 0x00000000, 0x00000008, 0x58454853, 0x000000e4, 0x00050050, 0x00000039, 0x0100086a, + 0x04000059, 0x00208e46, 0x00000000, 0x00000002, 0x0400289c, 0x0011e000, 0x00000000, 0x00004444, + 0x0200005f, 0x00020072, 0x02000068, 0x00000001, 0x0400009b, 0x00000008, 0x00000008, 0x00000001, + 0x07000022, 0x00100032, 0x00000000, 0x00020046, 0x00208ae6, 0x00000000, 0x00000001, 0x07000001, + 0x00100012, 0x00000000, 0x0010001a, 0x00000000, 0x0010000a, 0x00000000, 0x0304001f, 0x0010000a, + 0x00000000, 0x0700001e, 0x00100032, 0x00000000, 0x00020046, 0x00208046, 0x00000000, 0x00000001, + 0x04000036, 0x001000c2, 0x00000000, 0x00020aa6, 0x080000a4, 0x0011e0f2, 0x00000000, 0x00100e46, + 0x00000000, 0x00208e46, 0x00000000, 0x00000000, 0x01000015, 0x0100003e, +}; + +#endif /* __VKD3D_SHADERS_H */ diff --git a/tests/d3d12.c b/tests/d3d12.c index 5284138d..3b5264f8 100644 --- a/tests/d3d12.c +++ b/tests/d3d12.c @@ -4812,7 +4812,7 @@ static void test_clear_unordered_access_view_buffer(void) {DXGI_FORMAT_R32_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, {1, 0, 0, 0}, 1, false, true}, {DXGI_FORMAT_R32_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, - {0x3f800000 /* 1.0f */, 0, 0, 0}, 0x3f800000 /* 1.0f */, true, true}, + {0x3f800000 /* 1.0f */, 0, 0, 0}, 0x3f800000 /* 1.0f */, true}, {DXGI_FORMAT_R16G16_UINT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, {0x1234, 0xabcd, 0, 0}, 0xabcd1234, false, true}, @@ -4822,16 +4822,16 @@ static void test_clear_unordered_access_view_buffer(void) {DXGI_FORMAT_R16G16_UNORM, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, {0x1234, 0xabcd, 0, 0}, 0xabcd1234, false, true}, {DXGI_FORMAT_R16G16_UNORM, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, - {0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0, 0}, 0xffff8000, true, true}, + {0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0, 0}, 0xffff8000, true}, {DXGI_FORMAT_R16G16_UNORM, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, - {0x40000000 /* 2.0f */, 0 /* 0.0f */, 0, 0}, 0x0000ffff, true, true}, + {0x40000000 /* 2.0f */, 0 /* 0.0f */, 0, 0}, 0x0000ffff, true}, {DXGI_FORMAT_R16G16_UNORM, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, - {0xbf800000 /* -1.0f */, 0 /* 0.0f */, 0x3f000000 /* 1.0f */, 0x3f000000 /* 1.0f */}, 0, true, true}, + {0xbf800000 /* -1.0f */, 0 /* 0.0f */, 0x3f000000 /* 1.0f */, 0x3f000000 /* 1.0f */}, 0, true}, {DXGI_FORMAT_R16G16_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, {0x1234, 0xabcd, 0, 0}, 0xabcd1234, false, true}, {DXGI_FORMAT_R16G16_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, - {0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0, 0}, 0x3c003800, true, true}, + {0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0, 0}, 0x3c003800, true}, {DXGI_FORMAT_R8G8B8A8_UINT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, {0x11, 0x22, 0x33, 0x44}, 0x44332211, false, true}, @@ -4845,10 +4845,10 @@ static void test_clear_unordered_access_view_buffer(void) {DXGI_FORMAT_R11G11B10_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, {0x7ff, 0, 0x3ff, 0}, 0xffc007ff, false, true}, {DXGI_FORMAT_R11G11B10_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, - {0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0x40000000 /* 2.0f */, 0}, 0x801e0380, true, true}, + {0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0x40000000 /* 2.0f */, 0}, 0x801e0380, true}, {DXGI_FORMAT_R11G11B10_FLOAT, { 0, BUFFER_SIZE / sizeof(uint32_t), 0, 0, D3D12_BUFFER_UAV_FLAG_NONE}, {0x3f000000 /* 1.0f */, 0 /* 0.0f */, 0xbf800000 /* -1.0f */, 0x3f000000 /* 1.0f */}, - 0x00000380, true, true}, + 0x00000380, true}, }; memset(&desc, 0, sizeof(desc)); @@ -4982,48 +4982,48 @@ static void test_clear_unordered_access_view_image(void) tests[] = { /* Test clearing a specific mip level. */ - {DXGI_FORMAT_R32_FLOAT, 2, 1, 0, 0, 1, 0, {}, {1, 0, 0, 0}, 1, false, false}, - {DXGI_FORMAT_R32_FLOAT, 2, 1, 1, 0, 1, 0, {}, {1, 0, 0, 0}, 1, false, false}, - {DXGI_FORMAT_R32_FLOAT, 2, 1, 0, 0, 1, 0, {}, {0x3f000000, 0, 0, 0}, 0x3f000000, true, true}, - {DXGI_FORMAT_R32_FLOAT, 2, 1, 1, 0, 1, 0, {}, {0x3f000000, 0, 0, 0}, 0x3f000000, true, true}, + {DXGI_FORMAT_R32_FLOAT, 2, 1, 0, 0, 1, 0, {}, {1, 0, 0, 0}, 1}, + {DXGI_FORMAT_R32_FLOAT, 2, 1, 1, 0, 1, 0, {}, {1, 0, 0, 0}, 1}, + {DXGI_FORMAT_R32_FLOAT, 2, 1, 0, 0, 1, 0, {}, {0x3f000000, 0, 0, 0}, 0x3f000000, true}, + {DXGI_FORMAT_R32_FLOAT, 2, 1, 1, 0, 1, 0, {}, {0x3f000000, 0, 0, 0}, 0x3f000000, true}, /* Test clearing specific array layers. */ - {DXGI_FORMAT_R32_FLOAT, 1, IMAGE_SIZE, 0, 0, IMAGE_SIZE, 0, {}, {1, 0, 0, 0}, 1, false, false}, - {DXGI_FORMAT_R32_FLOAT, 1, IMAGE_SIZE, 0, 3, 2, 0, {}, {1, 0, 0, 0}, 1, false, false}, + {DXGI_FORMAT_R32_FLOAT, 1, IMAGE_SIZE, 0, 0, IMAGE_SIZE, 0, {}, {1, 0, 0, 0}, 1}, + {DXGI_FORMAT_R32_FLOAT, 1, IMAGE_SIZE, 0, 3, 2, 0, {}, {1, 0, 0, 0}, 1}, {DXGI_FORMAT_R32_FLOAT, 1, IMAGE_SIZE, 0, 0, IMAGE_SIZE, 0, {}, - {0x3f000000, 0, 0, 0}, 0x3f000000, true, true}, + {0x3f000000, 0, 0, 0}, 0x3f000000, true}, {DXGI_FORMAT_R32_FLOAT, 1, IMAGE_SIZE, 0, 3, 2, 0, {}, - {0x3f000000, 0, 0, 0}, 0x3f000000, true, true}, + {0x3f000000, 0, 0, 0}, 0x3f000000, true}, /* Test a single clear rect. */ {DXGI_FORMAT_R32_FLOAT, 1, 1, 0, 0, 1, 1, {{1, 2, IMAGE_SIZE - 4, IMAGE_SIZE - 2}}, {1, 0, 0, 0}, 1, false, true}, {DXGI_FORMAT_R32_FLOAT, 1, 1, 0, 0, 1, 1, {{1, 2, IMAGE_SIZE - 4, IMAGE_SIZE - 2}}, - {0x3f000000, 0, 0, 0}, 0x3f000000, true, true}, + {0x3f000000, 0, 0, 0}, 0x3f000000, true}, /* Test multiple clear rects. */ {DXGI_FORMAT_R32_FLOAT, 1, 1, 0, 0, 1, 2, {{1, 2, 3, 4}, {5, 6, 7, 8}}, {1, 0, 0, 0}, 1, false, true}, {DXGI_FORMAT_R32_FLOAT, 1, 1, 0, 0, 1, 2, {{1, 2, 3, 4}, {5, 6, 7, 8}}, - {0x3f000000, 0, 0, 0}, 0x3f000000, true, true}, + {0x3f000000, 0, 0, 0}, 0x3f000000, true}, /* Test uint clears with formats. */ - {DXGI_FORMAT_R16G16_UINT, 1, 1, 0, 0, 1, 0, {}, {1, 2, 3, 4}, 0x00020001, false, false}, + {DXGI_FORMAT_R16G16_UINT, 1, 1, 0, 0, 1, 0, {}, {1, 2, 3, 4}, 0x00020001}, {DXGI_FORMAT_R16G16_UINT, 1, 1, 0, 0, 1, 0, {}, {0x12345, 0, 0, 0}, 0x00002345, false, true}, {DXGI_FORMAT_R16G16_UNORM, 1, 1, 0, 0, 1, 0, {}, {1, 2, 3, 4}, 0x00020001, false, true}, {DXGI_FORMAT_R16G16_FLOAT, 1, 1, 0, 0, 1, 0, {}, {1, 2, 3, 4}, 0x00020001, false, true}, - {DXGI_FORMAT_R8G8B8A8_UINT, 1, 1, 0, 0, 1, 0, {}, {1, 2, 3, 4}, 0x04030201, false, false}, + {DXGI_FORMAT_R8G8B8A8_UINT, 1, 1, 0, 0, 1, 0, {}, {1, 2, 3, 4}, 0x04030201}, {DXGI_FORMAT_R8G8B8A8_UINT, 1, 1, 0, 0, 1, 0, {}, {0x123, 0, 0, 0}, 0x00000023, false, true}, {DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, 0, 0, 1, 0, {}, {1, 2, 3, 4}, 0x04030201, false, true}, {DXGI_FORMAT_R11G11B10_FLOAT, 1, 1, 0, 0, 1, 0, {}, {1, 2, 3, 4}, 0x00c01001, false, true}, /* Test float clears with formats. */ {DXGI_FORMAT_R16G16_UNORM, 1, 1, 0, 0, 1, 0, {}, - {0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0, 0}, 0xffff8000, true, true}, + {0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0, 0}, 0xffff8000, true}, {DXGI_FORMAT_R16G16_FLOAT, 1, 1, 0, 0, 1, 0, {}, - {0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0, 0}, 0x3c003800, true, true}, + {0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0, 0}, 0x3c003800, true}, {DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, 0, 0, 1, 0, {}, - {0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0, 0}, 0x0000ff80, true, true}, + {0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */, 0, 0}, 0x0000ff80, true}, {DXGI_FORMAT_R8G8B8A8_UNORM, 1, 1, 0, 0, 1, 0, {}, - {0, 0, 0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */}, 0xff800000, true, true}, + {0, 0, 0x3f000000 /* 0.5f */, 0x3f800000 /* 1.0f */}, 0xff800000, true}, {DXGI_FORMAT_R11G11B10_FLOAT, 1, 1, 0, 0, 1, 0, {}, {0x3f000000 /* 1.0f */, 0 /* 0.0f */, 0xbf800000 /* -1.0f */, 0x3f000000 /* 1.0f */}, - 0x00000380, true, true}, + 0x00000380, true}, }; static const struct