From 950b3671fefac301c6346f2b1067070eeb46b2ac Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Wed, 28 Jun 2023 16:27:03 +1000 Subject: [PATCH 2/3] Updated vkd3d to 2a3413e0f01524f2068bce12100906eb2200c965. --- include/d3d12.idl | 4 +- libs/vkd3d/Makefile.in | 1 + libs/vkd3d/include/private/vkd3d_common.h | 2 + .../include/private/vkd3d_shader_utils.h | 63 + libs/vkd3d/include/vkd3d.h | 35 + libs/vkd3d/include/vkd3d_shader.h | 116 +- libs/vkd3d/libs/vkd3d-common/debug.c | 17 +- libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 48 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 311 ++++- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 16 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 919 +++++++++++++ libs/vkd3d/libs/vkd3d-shader/hlsl.c | 59 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 19 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1196 ++++++++++------- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 189 ++- .../libs/vkd3d-shader/hlsl_constant_ops.c | 363 +++-- libs/vkd3d/libs/vkd3d-shader/spirv.c | 21 +- libs/vkd3d/libs/vkd3d-shader/tpf.c | 37 +- .../libs/vkd3d-shader/vkd3d_shader_main.c | 164 ++- .../libs/vkd3d-shader/vkd3d_shader_private.h | 20 + libs/vkd3d/libs/vkd3d/command.c | 222 ++- libs/vkd3d/libs/vkd3d/device.c | 2 + libs/vkd3d/libs/vkd3d/resource.c | 51 +- libs/vkd3d/libs/vkd3d/state.c | 10 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 47 +- 25 files changed, 3143 insertions(+), 789 deletions(-) create mode 100644 libs/vkd3d/include/private/vkd3d_shader_utils.h create mode 100644 libs/vkd3d/libs/vkd3d-shader/dxil.c diff --git a/include/d3d12.idl b/include/d3d12.idl index 4fec32d2656..c6064939e1f 100644 --- a/include/d3d12.idl +++ b/include/d3d12.idl @@ -2243,8 +2243,8 @@ interface ID3D12CommandQueue : ID3D12Pageable ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, - UINT *heap_range_offsets, - UINT *range_tile_counts, + const UINT *heap_range_offsets, + const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags); void CopyTileMappings(ID3D12Resource *dst_resource, diff --git a/libs/vkd3d/Makefile.in b/libs/vkd3d/Makefile.in index 1ba0e9f71e1..f647af11d07 100644 --- a/libs/vkd3d/Makefile.in +++ b/libs/vkd3d/Makefile.in @@ -17,6 +17,7 @@ SOURCES = \ libs/vkd3d-shader/d3d_asm.c \ libs/vkd3d-shader/d3dbc.c \ libs/vkd3d-shader/dxbc.c \ + libs/vkd3d-shader/dxil.c \ libs/vkd3d-shader/glsl.c \ libs/vkd3d-shader/hlsl.c \ libs/vkd3d-shader/hlsl.l \ diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 1ac23b4a085..da15ee23fd3 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -20,6 +20,7 @@ #define __VKD3D_COMMON_H #include "config.h" +#define WIN32_LEAN_AND_MEAN #include "windows.h" #include "vkd3d_types.h" @@ -28,6 +29,7 @@ #include #include #include +#include #ifdef _MSC_VER #include diff --git a/libs/vkd3d/include/private/vkd3d_shader_utils.h b/libs/vkd3d/include/private/vkd3d_shader_utils.h new file mode 100644 index 00000000000..00052a89988 --- /dev/null +++ b/libs/vkd3d/include/private/vkd3d_shader_utils.h @@ -0,0 +1,63 @@ +/* + * Copyright 2023 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifndef __VKD3D_SHADER_UTILS_H +#define __VKD3D_SHADER_UTILS_H + +#include "vkd3d_shader.h" + +#define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') +#define TAG_SHDR VKD3D_MAKE_TAG('S', 'H', 'D', 'R') +#define TAG_SHEX VKD3D_MAKE_TAG('S', 'H', 'E', 'X') + +static inline enum vkd3d_result vkd3d_shader_parse_dxbc_source_type(const struct vkd3d_shader_code *dxbc, + enum vkd3d_shader_source_type *type, char **messages) +{ + struct vkd3d_shader_dxbc_desc desc; + enum vkd3d_result ret; + unsigned int i; + + *type = VKD3D_SHADER_SOURCE_NONE; + + if ((ret = vkd3d_shader_parse_dxbc(dxbc, 0, &desc, messages)) < 0) + return ret; + + for (i = 0; i < desc.section_count; ++i) + { + uint32_t tag = desc.sections[i].tag; + if (tag == TAG_SHDR || tag == TAG_SHEX) + { + *type = VKD3D_SHADER_SOURCE_DXBC_TPF; + } + else if (tag == TAG_DXIL) + { + *type = VKD3D_SHADER_SOURCE_DXBC_DXIL; + /* Default to DXIL if both are present. */ + break; + } + } + + vkd3d_shader_free_dxbc(&desc); + + if (*type == VKD3D_SHADER_SOURCE_NONE) + return VKD3D_ERROR_INVALID_SHADER; + + return VKD3D_OK; +} + +#endif /* __VKD3D_SHADER_UTILS_H */ diff --git a/libs/vkd3d/include/vkd3d.h b/libs/vkd3d/include/vkd3d.h index 72ed3ced671..2ccda47248a 100644 --- a/libs/vkd3d/include/vkd3d.h +++ b/libs/vkd3d/include/vkd3d.h @@ -207,7 +207,42 @@ VKD3D_API VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device); VKD3D_API struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device); VKD3D_API uint32_t vkd3d_get_vk_queue_family_index(ID3D12CommandQueue *queue); + +/** + * Acquire the Vulkan queue backing a command queue. + * + * While a queue is acquired by the client, it is locked so that + * neither the vkd3d library nor other threads can submit work to + * it. For that reason it should be released as soon as possible with + * vkd3d_release_vk_queue(). The lock is not reentrant, so the same + * queue must not be acquired more than once by the same thread. + * + * Work submitted through the Direct3D 12 API exposed by vkd3d is not + * always immediately submitted to the Vulkan queue; sometimes it is + * kept in another internal queue, which might not necessarily be + * empty at the time vkd3d_acquire_vk_queue() is called. For this + * reason, work submitted directly to the Vulkan queue might appear to + * the Vulkan driver as being submitted before other work submitted + * though the Direct3D 12 API. If this is not desired, it is + * recommended to synchronize work submission using an ID3D12Fence + * object, by submitting to the queue a signal operation after all the + * Direct3D 12 work is submitted and waiting for it before calling + * vkd3d_acquire_vk_queue(). + * + * \since 1.0 + */ VKD3D_API VkQueue vkd3d_acquire_vk_queue(ID3D12CommandQueue *queue); + +/** + * Release the Vulkan queue backing a command queue. + * + * This must be paired to an earlier corresponding + * vkd3d_acquire_vk_queue(). After this function is called, the Vulkan + * queue returned by vkd3d_acquire_vk_queue() must not be used any + * more. + * + * \since 1.0 + */ VKD3D_API void vkd3d_release_vk_queue(ID3D12CommandQueue *queue); VKD3D_API HRESULT vkd3d_create_image_resource(ID3D12Device *device, diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index 274241546ea..6c17a07b9d2 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -85,6 +85,11 @@ enum vkd3d_shader_structure_type * \since 1.3 */ VKD3D_SHADER_STRUCTURE_TYPE_DESCRIPTOR_OFFSET_INFO, + /** + * The structure is a vkd3d_shader_scan_signature_info structure. + * \since 1.9 + */ + VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), }; @@ -611,6 +616,11 @@ enum vkd3d_shader_source_type * model 1, 2, and 3 shaders. \since 1.3 */ VKD3D_SHADER_SOURCE_D3D_BYTECODE, + /** + * A 'DirectX Intermediate Language' shader embedded in a DXBC container. This is + * the format used for Direct3D shader model 6 shaders. \since 1.9 + */ + VKD3D_SHADER_SOURCE_DXBC_DXIL, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SOURCE_TYPE), }; @@ -620,7 +630,7 @@ enum vkd3d_shader_target_type { /** * The shader has no type or is to be ignored. This is not a valid value - * for vkd3d_shader_compile() or vkd3d_shader_scan(). + * for vkd3d_shader_compile(). */ VKD3D_SHADER_TARGET_NONE, /** @@ -1551,6 +1561,64 @@ static inline uint32_t vkd3d_shader_create_swizzle(enum vkd3d_shader_swizzle_com | ((w & VKD3D_SHADER_SWIZZLE_MASK) << VKD3D_SHADER_SWIZZLE_SHIFT(3)); } +/** + * A chained structure containing descriptions of shader inputs and outputs. + * + * This structure is currently implemented only for DXBC and legacy D3D bytecode + * source types. + * For DXBC shaders, the returned information is parsed directly from the + * signatures embedded in the DXBC shader. + * For legacy D3D shaders, the returned information is synthesized based on + * registers declared or used by shader instructions. + * For all other shader types, the structure is zeroed. + * + * All members (except for \ref type and \ref next) are output-only. + * + * This structure is passed to vkd3d_shader_scan() and extends + * vkd3d_shader_compile_info. + * + * Members of this structure are allocated by vkd3d-shader and should be freed + * with vkd3d_shader_free_scan_signature_info() when no longer needed. + * + * All signatures may contain pointers into the input shader, and should only + * be accessed while the input shader remains valid. + * + * Signature elements are synthesized from legacy Direct3D bytecode as follows: + * - The \ref vkd3d_shader_signature_element.semantic_name field is set to an + * uppercase string corresponding to the HLSL name for the usage, e.g. + * "POSITION", "BLENDWEIGHT", "COLOR", "PSIZE", etc. + * - The \ref vkd3d_shader_signature_element.semantic_index field is set to the + * usage index. + * - The \ref vkd3d_shader_signature_element.stream_index is always 0. + * + * Signature elements are synthesized for any input or output register declared + * or used in a legacy Direct3D bytecode shader, including the following: + * - Shader model 1 and 2 colour and texture coordinate registers. + * - The shader model 1 pixel shader output register. + * - Shader model 1 and 2 vertex shader output registers (position, fog, and + * point size). + * - Shader model 3 pixel shader system value input registers (pixel position + * and face). + * + * \since 1.9 + */ +struct vkd3d_shader_scan_signature_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_SCAN_SIGNATURE_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** The shader input varyings. */ + struct vkd3d_shader_signature input; + + /** The shader output varyings. */ + struct vkd3d_shader_signature output; + + /** The shader patch constant varyings. */ + struct vkd3d_shader_signature patch_constant; +}; + #ifdef LIBVKD3D_SHADER_SOURCE # define VKD3D_SHADER_API VKD3D_EXPORT #else @@ -1625,6 +1693,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported * following chained structures: * - vkd3d_shader_interface_info * - vkd3d_shader_scan_descriptor_info + * - vkd3d_shader_scan_signature_info * - vkd3d_shader_spirv_domain_shader_target_info * - vkd3d_shader_spirv_target_info * - vkd3d_shader_transform_feedback_info @@ -1784,6 +1853,26 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver * Parse shader source code or byte code, returning various types of requested * information. * + * The \a source_type member of \a compile_info must be set to the type of the + * shader. + * + * The \a target_type member may be set to VKD3D_SHADER_TARGET_NONE, in which + * case vkd3d_shader_scan() will return information about the shader in + * isolation. Alternatively, it may be set to a valid compilation target for the + * shader, in which case vkd3d_shader_scan() will return information that + * reflects the interface for a shader as it will be compiled to that target. + * In this case other chained structures may be appended to \a compile_info as + * they would be passed to vkd3d_shader_compile(), and interpreted accordingly, + * such as vkd3d_shader_spirv_target_info. + * + * (For a hypothetical example, suppose the source shader distinguishes float + * and integer texture data, but the target environment does not support integer + * textures. In this case vkd3d_shader_compile() might translate integer + * operations to float. Accordingly using VKD3D_SHADER_TARGET_NONE would + * accurately report whether the texture expects integer or float data, but + * using the relevant specific target type would report + * VKD3D_SHADER_RESOURCE_DATA_FLOAT.) + * * Currently this function supports the following code types: * - VKD3D_SHADER_SOURCE_DXBC_TPF * @@ -1791,6 +1880,7 @@ VKD3D_SHADER_API int vkd3d_shader_convert_root_signature(struct vkd3d_shader_ver * \n * The DXBC_TPF scanner supports the following chained structures: * - vkd3d_shader_scan_descriptor_info + * - vkd3d_shader_scan_signature_info * \n * Although the \a compile_info parameter is read-only, chained structures * passed to this function need not be, and may serve as output parameters, @@ -1827,12 +1917,18 @@ VKD3D_SHADER_API void vkd3d_shader_free_scan_descriptor_info( struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info); /** - * Read the input signature of a compiled shader, returning a structural + * Read the input signature of a compiled DXBC shader, returning a structural * description which can be easily parsed by C code. * * This function parses a compiled shader. To parse a standalone root signature, * use vkd3d_shader_parse_root_signature(). * + * This function only parses DXBC shaders, and only retrieves the input + * signature. To retrieve signatures from other shader types, or other signature + * types, use vkd3d_shader_scan() and struct vkd3d_shader_scan_signature_info. + * This function returns the same input signature that is returned in + * struct vkd3d_shader_scan_signature_info. + * * \param dxbc Compiled byte code, in DXBC format. * * \param signature Output location in which the parsed root signature will be @@ -2022,6 +2118,19 @@ VKD3D_SHADER_API int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxb VKD3D_SHADER_API int vkd3d_shader_serialize_dxbc(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); +/** + * Free members of struct vkd3d_shader_scan_signature_info allocated by + * vkd3d_shader_scan(). + * + * This function may free members of vkd3d_shader_scan_signature_info, but + * does not free the structure itself. + * + * \param info Scan information to free. + * + * \since 1.9 + */ +VKD3D_SHADER_API void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info); + #endif /* VKD3D_SHADER_NO_PROTOTYPES */ /** Type of vkd3d_shader_get_version(). */ @@ -2087,6 +2196,9 @@ typedef int (*PFN_vkd3d_shader_parse_dxbc)(const struct vkd3d_shader_code *dxbc, typedef int (*PFN_vkd3d_shader_serialize_dxbc)(size_t section_count, const struct vkd3d_shader_dxbc_section_desc *sections, struct vkd3d_shader_code *dxbc, char **messages); +/** Type of vkd3d_shader_free_scan_signature_info(). \since 1.9 */ +typedef void (*PFN_vkd3d_shader_free_scan_signature_info)(struct vkd3d_shader_scan_signature_info *info); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index b363efbd360..aa7df5bd764 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -31,6 +31,7 @@ #include #include #include +#include #ifdef HAVE_PTHREAD_H #include #endif @@ -44,11 +45,11 @@ extern const char *const vkd3d_dbg_env_name; static const char *const debug_level_names[] = { - /* VKD3D_DBG_LEVEL_NONE */ "none", - /* VKD3D_DBG_LEVEL_ERR */ "err", - /* VKD3D_DBG_LEVEL_FIXME */ "fixme", - /* VKD3D_DBG_LEVEL_WARN */ "warn", - /* VKD3D_DBG_LEVEL_TRACE */ "trace", + [VKD3D_DBG_LEVEL_NONE ] = "none", + [VKD3D_DBG_LEVEL_ERR ] = "err", + [VKD3D_DBG_LEVEL_FIXME] = "fixme", + [VKD3D_DBG_LEVEL_WARN ] = "warn", + [VKD3D_DBG_LEVEL_TRACE] = "trace", }; enum vkd3d_dbg_level vkd3d_dbg_get_level(void) @@ -105,7 +106,13 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch assert(level < ARRAY_SIZE(debug_level_names)); +#ifdef _WIN32 + vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); +#elif HAVE_GETTID + vkd3d_dbg_output("vkd3d:%u:%s:%s ", gettid(), debug_level_names[level], function); +#else vkd3d_dbg_output("vkd3d:%s:%s ", debug_level_names[level], function); +#endif va_start(args, fmt); vkd3d_dbg_voutput(fmt, args); va_end(args); diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 0a821b5c878..d72402eb250 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -578,17 +578,17 @@ static void shader_dump_resource_type(struct vkd3d_d3d_asm_compiler *compiler, e { static const char *const resource_type_names[] = { - /* VKD3D_SHADER_RESOURCE_NONE */ "none", - /* VKD3D_SHADER_RESOURCE_BUFFER */ "buffer", - /* VKD3D_SHADER_RESOURCE_TEXTURE_1D */ "texture1d", - /* VKD3D_SHADER_RESOURCE_TEXTURE_2D */ "texture2d", - /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMS */ "texture2dms", - /* VKD3D_SHADER_RESOURCE_TEXTURE_3D */ "texture3d", - /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBE */ "texturecube", - /* VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY */ "texture1darray", - /* VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY */ "texture2darray", - /* VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY */ "texture2dmsarray", - /* VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY */ "texturecubearray", + [VKD3D_SHADER_RESOURCE_NONE ] = "none", + [VKD3D_SHADER_RESOURCE_BUFFER ] = "buffer", + [VKD3D_SHADER_RESOURCE_TEXTURE_1D ] = "texture1d", + [VKD3D_SHADER_RESOURCE_TEXTURE_2D ] = "texture2d", + [VKD3D_SHADER_RESOURCE_TEXTURE_2DMS ] = "texture2dms", + [VKD3D_SHADER_RESOURCE_TEXTURE_3D ] = "texture3d", + [VKD3D_SHADER_RESOURCE_TEXTURE_CUBE ] = "texturecube", + [VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY ] = "texture1darray", + [VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY ] = "texture2darray", + [VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY] = "texture2dmsarray", + [VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY] = "texturecubearray", }; if (type < ARRAY_SIZE(resource_type_names)) @@ -601,19 +601,19 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, const { static const char *const data_type_names[] = { - /* VKD3D_DATA_FLOAT */ "float", - /* VKD3D_DATA_INT */ "int", - /* VKD3D_DATA_RESOURCE */ "resource", - /* VKD3D_DATA_SAMPLER */ "sampler", - /* VKD3D_DATA_UAV */ "uav", - /* VKD3D_DATA_UINT */ "uint", - /* VKD3D_DATA_UNORM */ "unorm", - /* VKD3D_DATA_SNORM */ "snorm", - /* VKD3D_DATA_OPAQUE */ "opaque", - /* VKD3D_DATA_MIXED */ "mixed", - /* VKD3D_DATA_DOUBLE */ "double", - /* VKD3D_DATA_CONTINUED */ "", - /* VKD3D_DATA_UNUSED */ "", + [VKD3D_DATA_FLOAT ] = "float", + [VKD3D_DATA_INT ] = "int", + [VKD3D_DATA_RESOURCE ] = "resource", + [VKD3D_DATA_SAMPLER ] = "sampler", + [VKD3D_DATA_UAV ] = "uav", + [VKD3D_DATA_UINT ] = "uint", + [VKD3D_DATA_UNORM ] = "unorm", + [VKD3D_DATA_SNORM ] = "snorm", + [VKD3D_DATA_OPAQUE ] = "opaque", + [VKD3D_DATA_MIXED ] = "mixed", + [VKD3D_DATA_DOUBLE ] = "double", + [VKD3D_DATA_CONTINUED] = "", + [VKD3D_DATA_UNUSED ] = "", }; const char *name; int i; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 712613ac13b..369112ce18d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -260,9 +260,9 @@ static const struct vkd3d_sm1_opcode_info vs_opcode_table[] = /* Declarations */ {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, /* Constant definitions */ - {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, - {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, + {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, /* Control flow */ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}}, {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}}, @@ -327,9 +327,9 @@ static const struct vkd3d_sm1_opcode_info ps_opcode_table[] = /* Declarations */ {VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL}, /* Constant definitions */ - {VKD3D_SM1_OP_DEF, 1, 4, VKD3DSIH_DEF}, + {VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF}, {VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB}, - {VKD3D_SM1_OP_DEFI, 1, 4, VKD3DSIH_DEFI}, + {VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI}, /* Control flow */ {VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}}, {VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}}, @@ -490,6 +490,255 @@ static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT; } +static struct signature_element *find_signature_element(const struct shader_signature *signature, + const char *semantic_name, unsigned int semantic_index) +{ + struct signature_element *e = signature->elements; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + if (!ascii_strcasecmp(e[i].semantic_name, semantic_name) + && e[i].semantic_index == semantic_index) + return &e[i]; + } + + return NULL; +} + +static struct signature_element *find_signature_element_by_register_index( + const struct shader_signature *signature, unsigned int register_index) +{ + struct signature_element *e = signature->elements; + unsigned int i; + + for (i = 0; i < signature->element_count; ++i) + { + if (e[i].register_index == register_index) + return &e[i]; + } + + return NULL; +} + +static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output, + const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval, + unsigned int register_index, bool is_dcl, unsigned int mask) +{ + struct shader_signature *signature; + struct signature_element *element; + + if (output) + signature = &sm1->p.shader_desc.output_signature; + else + signature = &sm1->p.shader_desc.input_signature; + + if ((element = find_signature_element(signature, name, index))) + { + element->mask |= mask; + if (!is_dcl) + element->used_mask |= mask; + return true; + } + + if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, + signature->element_count + 1, sizeof(*signature->elements))) + return false; + element = &signature->elements[signature->element_count++]; + + element->semantic_name = name; + element->semantic_index = index; + element->stream_index = 0; + element->sysval_semantic = sysval; + element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + element->register_index = register_index; + element->register_count = 1; + element->mask = mask; + element->used_mask = is_dcl ? 0 : mask; + element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE; + + return true; +} + +static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, + unsigned int register_index, unsigned int mask) +{ + struct shader_signature *signature; + struct signature_element *element; + + if (output) + signature = &sm1->p.shader_desc.output_signature; + else + signature = &sm1->p.shader_desc.input_signature; + + if (!(element = find_signature_element_by_register_index(signature, register_index))) + { + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC, + "%s register %u was used without being declared.", output ? "Output" : "Input", register_index); + return; + } + + element->used_mask |= mask; +} + +static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask) +{ + unsigned int register_index = reg->idx[0].offset; + + switch (reg->type) + { + case VKD3DSPR_TEMP: + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL + && sm1->p.shader_version.major == 1 && !register_index) + return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_NONE, 0, is_dcl, mask); + return true; + + case VKD3DSPR_INPUT: + /* For vertex shaders or sm3 pixel shaders, we should have already + * had a DCL instruction. Otherwise, this is a colour input. */ + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX || sm1->p.shader_version.major == 3) + { + add_signature_mask(sm1, false, register_index, mask); + return true; + } + return add_signature_element(sm1, false, "COLOR", register_index, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + + case VKD3DSPR_TEXTURE: + /* For vertex shaders, this is ADDR. */ + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) + return true; + return add_signature_element(sm1, false, "TEXCOORD", register_index, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + + case VKD3DSPR_OUTPUT: + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX) + { + /* For sm < 2 vertex shaders, this is TEXCRDOUT. + * + * For sm3 vertex shaders, this is OUTPUT, but we already + * should have had a DCL instruction. */ + if (sm1->p.shader_version.major == 3) + { + add_signature_mask(sm1, true, register_index, mask); + return true; + } + return add_signature_element(sm1, true, "TEXCOORD", register_index, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + } + /* fall through */ + + case VKD3DSPR_ATTROUT: + case VKD3DSPR_COLOROUT: + return add_signature_element(sm1, true, "COLOR", register_index, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask); + + case VKD3DSPR_DEPTHOUT: + return add_signature_element(sm1, true, "DEPTH", 0, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); + + case VKD3DSPR_RASTOUT: + switch (register_index) + { + case 0: + return add_signature_element(sm1, true, "POSITION", 0, + VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); + + case 1: + return add_signature_element(sm1, true, "FOG", 0, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); + + case 2: + return add_signature_element(sm1, true, "PSIZE", 0, + VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1); + + default: + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, + "Invalid rasterizer output index %u.", register_index); + return true; + } + + case VKD3DSPR_MISCTYPE: + switch (register_index) + { + case 0: + return add_signature_element(sm1, false, "VPOS", 0, + VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask); + + case 1: + return add_signature_element(sm1, false, "VFACE", 0, + VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1); + + default: + vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX, + "Invalid miscellaneous fragment input index %u.", register_index); + return true; + } + + default: + return true; + } +} + +static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1, + const struct vkd3d_shader_semantic *semantic) +{ + const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg; + enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; + unsigned int mask = semantic->resource.reg.write_mask; + bool output; + + static const char sm1_semantic_names[][13] = + { + [VKD3D_DECL_USAGE_POSITION ] = "POSITION", + [VKD3D_DECL_USAGE_BLEND_WEIGHT ] = "BLENDWEIGHT", + [VKD3D_DECL_USAGE_BLEND_INDICES] = "BLENDINDICES", + [VKD3D_DECL_USAGE_NORMAL ] = "NORMAL", + [VKD3D_DECL_USAGE_PSIZE ] = "PSIZE", + [VKD3D_DECL_USAGE_TEXCOORD ] = "TEXCOORD", + [VKD3D_DECL_USAGE_TANGENT ] = "TANGENT", + [VKD3D_DECL_USAGE_BINORMAL ] = "BINORMAL", + [VKD3D_DECL_USAGE_TESS_FACTOR ] = "TESSFACTOR", + [VKD3D_DECL_USAGE_POSITIONT ] = "POSITIONT", + [VKD3D_DECL_USAGE_COLOR ] = "COLOR", + [VKD3D_DECL_USAGE_FOG ] = "FOG", + [VKD3D_DECL_USAGE_DEPTH ] = "DEPTH", + [VKD3D_DECL_USAGE_SAMPLE ] = "SAMPLE", + }; + + if (reg->type == VKD3DSPR_OUTPUT) + output = true; + else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE) + output = false; + else /* vpos and vface don't have a semantic. */ + return add_signature_element_from_register(sm1, reg, true, mask); + + /* sm2 pixel shaders use DCL but don't provide a semantic. */ + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && sm1->p.shader_version.major == 2) + return add_signature_element_from_register(sm1, reg, true, mask); + + /* With the exception of vertex POSITION output, none of these are system + * values. Pixel POSITION input is not equivalent to SV_Position; the closer + * equivalent is VPOS, which is not declared as a semantic. */ + if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX + && output && semantic->usage == VKD3D_DECL_USAGE_POSITION) + sysval = VKD3D_SHADER_SV_POSITION; + + return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage], + semantic->usage_idx, sysval, reg->idx[0].offset, true, mask); +} + +static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1, const struct vkd3d_shader_register *reg, unsigned int mask) +{ + uint32_t register_index = reg->idx[0].offset; + + if (reg->type == VKD3DSPR_TEMP) + sm1->p.shader_desc.temp_count = max(sm1->p.shader_desc.temp_count, register_index + 1); + + add_signature_element_from_register(sm1, reg, false, mask); +} + /* Read a parameter token from the input stream, and possibly a relative * addressing token. */ static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1, @@ -640,6 +889,8 @@ static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1, range = &semantic->resource.range; range->space = 0; range->first = range->last = semantic->resource.reg.reg.idx[0].offset; + + add_signature_element_from_semantic(sm1, semantic); } static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr, @@ -744,6 +995,14 @@ static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, } } +static unsigned int mask_from_swizzle(unsigned int swizzle) +{ + return (1u << vkd3d_swizzle_get_component(swizzle, 0)) + | (1u << vkd3d_swizzle_get_component(swizzle, 1)) + | (1u << vkd3d_swizzle_get_component(swizzle, 2)) + | (1u << vkd3d_swizzle_get_component(swizzle, 3)); +} + static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) { struct vkd3d_shader_src_param *src_params, *predicate; @@ -832,7 +1091,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str { /* Destination token */ if (ins->dst_count) + { shader_sm1_read_dst_param(sm1, &p, dst_param); + shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask); + } /* Predication token */ if (ins->predicate) @@ -840,7 +1102,10 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str /* Other source tokens */ for (i = 0; i < ins->src_count; ++i) + { shader_sm1_read_src_param(sm1, &p, &src_params[i]); + shader_sm1_scan_register(sm1, &src_params[i].reg, mask_from_swizzle(src_params[i].swizzle)); + } } if (sm1->abort) @@ -1553,12 +1818,13 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ for (i = 0; i < ctx->constant_defs.count; ++i) { + const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i]; uint32_t token = D3DSIO_DEF; const struct sm1_dst_register reg = { .type = D3DSPR_CONST, .writemask = VKD3DSP_WRITEMASK_ALL, - .reg = i, + .reg = constant_reg->index, }; if (ctx->profile->major_version > 1) @@ -1567,7 +1833,7 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ write_sm1_dst_register(buffer, ®); for (x = 0; x < 4; ++x) - put_f32(buffer, ctx->constant_defs.values[i].f[x]); + put_f32(buffer, constant_reg->value.f[x]); } } @@ -1844,6 +2110,35 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } } +static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +{ + const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + + switch (jump->type) + { + case HLSL_IR_JUMP_DISCARD_NEG: + { + struct hlsl_reg *reg = &jump->condition.node->reg; + + struct sm1_instruction instr = + { + .opcode = VKD3D_SM1_OP_TEXKILL, + + .dst.type = D3DSPR_TEMP, + .dst.reg = reg->id, + .dst.writemask = reg->writemask, + .has_dst = 1, + }; + + write_sm1_instruction(ctx, buffer, &instr); + break; + } + + default: + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); + } +} + static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) { const struct hlsl_ir_load *load = hlsl_ir_load(instr); @@ -2038,6 +2333,10 @@ static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b write_sm1_expr(ctx, buffer, instr); break; + case HLSL_IR_JUMP: + write_sm1_jump(ctx, buffer, instr); + break; + case HLSL_IR_LOAD: write_sm1_load(ctx, buffer, instr); break; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index 3e3f06faeb5..716b7bdb721 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -493,8 +493,14 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, return ret; break; + case TAG_DXIL: case TAG_SHDR: case TAG_SHEX: + if ((section->tag == TAG_DXIL) != desc->is_dxil) + { + TRACE("Skipping chunk %#x.\n", section->tag); + break; + } if (desc->byte_code) FIXME("Multiple shader code chunks.\n"); desc->byte_code = section->data.code; @@ -505,10 +511,6 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, TRACE("Skipping AON9 shader code chunk.\n"); break; - case TAG_DXIL: - FIXME("Skipping DXIL shader model 6+ code chunk.\n"); - break; - default: TRACE("Skipping chunk %#x.\n", section->tag); break; @@ -529,12 +531,6 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, { int ret; - desc->byte_code = NULL; - desc->byte_code_size = 0; - memset(&desc->input_signature, 0, sizeof(desc->input_signature)); - memset(&desc->output_signature, 0, sizeof(desc->output_signature)); - memset(&desc->patch_constant_signature, 0, sizeof(desc->patch_constant_signature)); - ret = for_each_dxbc_section(dxbc, message_context, source_name, shdr_handler, desc); if (!desc->byte_code) ret = VKD3D_ERROR_INVALID_ARGUMENT; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c new file mode 100644 index 00000000000..67dcd26a0e0 --- /dev/null +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -0,0 +1,919 @@ +/* + * Copyright 2023 Conor McCarthy for CodeWeavers + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#include "vkd3d_shader_private.h" + +#define VKD3D_SM6_VERSION_MAJOR(version) (((version) >> 4) & 0xf) +#define VKD3D_SM6_VERSION_MINOR(version) (((version) >> 0) & 0xf) + +#define BITCODE_MAGIC VKD3D_MAKE_TAG('B', 'C', 0xc0, 0xde) + +enum bitcode_block_id +{ + BLOCKINFO_BLOCK = 0, + MODULE_BLOCK = 8, + PARAMATTR_BLOCK = 9, + PARAMATTR_GROUP_BLOCK = 10, + CONSTANTS_BLOCK = 11, + FUNCTION_BLOCK = 12, + VALUE_SYMTAB_BLOCK = 14, + METADATA_BLOCK = 15, + METADATA_ATTACHMENT_BLOCK = 16, + TYPE_BLOCK = 17, + USELIST_BLOCK = 18, +}; + +enum bitcode_blockinfo_code +{ + SETBID = 1, + BLOCKNAME = 2, + SETRECORDNAME = 3, +}; + +enum bitcode_block_abbreviation +{ + END_BLOCK = 0, + ENTER_SUBBLOCK = 1, + DEFINE_ABBREV = 2, + UNABBREV_RECORD = 3, +}; + +enum bitcode_abbrev_type +{ + ABBREV_FIXED = 1, + ABBREV_VBR = 2, + ABBREV_ARRAY = 3, + ABBREV_CHAR = 4, + ABBREV_BLOB = 5, +}; + +struct dxil_record +{ + unsigned int code; + unsigned int operand_count; + uint64_t operands[]; +}; + +struct dxil_block +{ + const struct dxil_block *parent; + enum bitcode_block_id id; + unsigned int abbrev_len; + unsigned int start; + unsigned int length; + unsigned int level; + + /* The abbrev, block and record structs are not relocatable. */ + struct dxil_abbrev **abbrevs; + size_t abbrev_capacity; + size_t abbrev_count; + unsigned int blockinfo_bid; + bool has_bid; + + struct dxil_block **child_blocks; + size_t child_block_capacity; + size_t child_block_count; + + struct dxil_record **records; + size_t record_capacity; + size_t record_count; +}; + +struct sm6_parser +{ + const uint32_t *ptr, *start, *end; + unsigned int bitpos; + + struct dxil_block root_block; + struct dxil_block *current_block; + + struct dxil_global_abbrev **abbrevs; + size_t abbrev_capacity; + size_t abbrev_count; + + struct vkd3d_shader_parser p; +}; + +struct dxil_abbrev_operand +{ + uint64_t context; + bool (*read_operand)(struct sm6_parser *sm6, uint64_t context, uint64_t *operand); +}; + +struct dxil_abbrev +{ + unsigned int count; + bool is_array; + struct dxil_abbrev_operand operands[]; +}; + +struct dxil_global_abbrev +{ + unsigned int block_id; + struct dxil_abbrev abbrev; +}; + +static struct sm6_parser *sm6_parser(struct vkd3d_shader_parser *parser) +{ + return CONTAINING_RECORD(parser, struct sm6_parser, p); +} + +static bool sm6_parser_is_end(struct sm6_parser *sm6) +{ + return sm6->ptr == sm6->end; +} + +static uint32_t sm6_parser_read_uint32(struct sm6_parser *sm6) +{ + if (sm6_parser_is_end(sm6)) + { + sm6->p.failed = true; + return 0; + } + return *sm6->ptr++; +} + +static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length) +{ + unsigned int l, prev_len = 0; + uint32_t bits; + + if (!length) + return 0; + + assert(length < 32); + + if (sm6_parser_is_end(sm6)) + { + sm6->p.failed = true; + return 0; + } + + assert(sm6->bitpos < 32); + bits = *sm6->ptr >> sm6->bitpos; + l = 32 - sm6->bitpos; + if (l <= length) + { + ++sm6->ptr; + if (sm6_parser_is_end(sm6) && l < length) + { + sm6->p.failed = true; + return bits; + } + sm6->bitpos = 0; + bits |= *sm6->ptr << l; + prev_len = l; + } + sm6->bitpos += length - prev_len; + + return bits & ((1 << length) - 1); +} + +static uint64_t sm6_parser_read_vbr(struct sm6_parser *sm6, unsigned int length) +{ + unsigned int bits, flag, mask, shift = 0; + uint64_t result = 0; + + if (!length) + return 0; + + if (sm6_parser_is_end(sm6)) + { + sm6->p.failed = true; + return 0; + } + + flag = 1 << (length - 1); + mask = flag - 1; + do + { + bits = sm6_parser_read_bits(sm6, length); + result |= (uint64_t)(bits & mask) << shift; + shift += length - 1; + } while ((bits & flag) && !sm6->p.failed && shift < 64); + + sm6->p.failed |= !!(bits & flag); + + return result; +} + +static void sm6_parser_align_32(struct sm6_parser *sm6) +{ + if (!sm6->bitpos) + return; + + if (sm6_parser_is_end(sm6)) + { + sm6->p.failed = true; + return; + } + + ++sm6->ptr; + sm6->bitpos = 0; +} + +static bool dxil_block_handle_blockinfo_record(struct dxil_block *block, struct dxil_record *record) +{ + /* BLOCKINFO blocks must only occur immediately below the module root block. */ + if (block->level > 1) + { + WARN("Invalid blockinfo block level %u.\n", block->level); + return false; + } + + switch (record->code) + { + case SETBID: + if (!record->operand_count) + { + WARN("Missing id operand.\n"); + return false; + } + if (record->operands[0] > UINT_MAX) + WARN("Truncating block id %"PRIu64".\n", record->operands[0]); + block->blockinfo_bid = record->operands[0]; + block->has_bid = true; + break; + case BLOCKNAME: + case SETRECORDNAME: + break; + default: + FIXME("Unhandled BLOCKINFO record type %u.\n", record->code); + break; + } + + return true; +} + +static enum vkd3d_result dxil_block_add_record(struct dxil_block *block, struct dxil_record *record) +{ + unsigned int reserve; + + switch (block->id) + { + /* Rough initial reserve sizes for small shaders. */ + case CONSTANTS_BLOCK: reserve = 32; break; + case FUNCTION_BLOCK: reserve = 128; break; + case METADATA_BLOCK: reserve = 32; break; + case TYPE_BLOCK: reserve = 32; break; + default: reserve = 8; break; + } + reserve = max(reserve, block->record_count + 1); + if (!vkd3d_array_reserve((void **)&block->records, &block->record_capacity, reserve, sizeof(*block->records))) + { + ERR("Failed to allocate %u records.\n", reserve); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if (block->id == BLOCKINFO_BLOCK && !dxil_block_handle_blockinfo_record(block, record)) + return VKD3D_ERROR_INVALID_SHADER; + + block->records[block->record_count++] = record; + + return VKD3D_OK; +} + +static enum vkd3d_result sm6_parser_read_unabbrev_record(struct sm6_parser *sm6) +{ + struct dxil_block *block = sm6->current_block; + enum vkd3d_result ret = VKD3D_OK; + unsigned int code, count, i; + struct dxil_record *record; + + code = sm6_parser_read_vbr(sm6, 6); + + count = sm6_parser_read_vbr(sm6, 6); + if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) + { + ERR("Failed to allocate record with %u operands.\n", count); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + record->code = code; + record->operand_count = count; + + for (i = 0; i < count; ++i) + record->operands[i] = sm6_parser_read_vbr(sm6, 6); + if (sm6->p.failed) + ret = VKD3D_ERROR_INVALID_SHADER; + + if (ret < 0 || (ret = dxil_block_add_record(block, record)) < 0) + vkd3d_free(record); + + return ret; +} + +static bool sm6_parser_read_literal_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + *op = context; + return !sm6->p.failed; +} + +static bool sm6_parser_read_fixed_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + *op = sm6_parser_read_bits(sm6, context); + return !sm6->p.failed; +} + +static bool sm6_parser_read_vbr_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + *op = sm6_parser_read_vbr(sm6, context); + return !sm6->p.failed; +} + +static bool sm6_parser_read_char6_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + *op = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789._"[sm6_parser_read_bits(sm6, 6)]; + return !sm6->p.failed; +} + +static bool sm6_parser_read_blob_operand(struct sm6_parser *sm6, uint64_t context, uint64_t *op) +{ + int count = sm6_parser_read_vbr(sm6, 6); + sm6_parser_align_32(sm6); + for (; count > 0; count -= 4) + sm6_parser_read_uint32(sm6); + FIXME("Unhandled blob operand.\n"); + return false; +} + +static enum vkd3d_result dxil_abbrev_init(struct dxil_abbrev *abbrev, unsigned int count, struct sm6_parser *sm6) +{ + enum bitcode_abbrev_type prev_type, type; + unsigned int i; + + abbrev->is_array = false; + + for (i = 0, prev_type = 0; i < count && !sm6->p.failed; ++i) + { + if (sm6_parser_read_bits(sm6, 1)) + { + if (prev_type == ABBREV_ARRAY) + { + WARN("Unexpected literal abbreviation after array.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 8); + abbrev->operands[i].read_operand = sm6_parser_read_literal_operand; + continue; + } + + switch (type = sm6_parser_read_bits(sm6, 3)) + { + case ABBREV_FIXED: + case ABBREV_VBR: + abbrev->operands[i].context = sm6_parser_read_vbr(sm6, 5); + abbrev->operands[i].read_operand = (type == ABBREV_FIXED) ? sm6_parser_read_fixed_operand + : sm6_parser_read_vbr_operand; + break; + + case ABBREV_ARRAY: + if (prev_type == ABBREV_ARRAY || i != count - 2) + { + WARN("Unexpected array abbreviation.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + abbrev->is_array = true; + --i; + --count; + break; + + case ABBREV_CHAR: + abbrev->operands[i].read_operand = sm6_parser_read_char6_operand; + break; + + case ABBREV_BLOB: + if (prev_type == ABBREV_ARRAY || i != count - 1) + { + WARN("Unexpected blob abbreviation.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + abbrev->operands[i].read_operand = sm6_parser_read_blob_operand; + break; + } + + prev_type = type; + } + + abbrev->count = count; + + return sm6->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK; +} + +static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) +{ + struct dxil_block *block = sm6->current_block; + unsigned int count = sm6_parser_read_vbr(sm6, 5); + struct dxil_global_abbrev *global_abbrev; + enum vkd3d_result ret; + + assert(block->id == BLOCKINFO_BLOCK); + + if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) + || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) + { + ERR("Failed to allocate global abbreviation.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if ((ret = dxil_abbrev_init(&global_abbrev->abbrev, count, sm6)) < 0) + { + vkd3d_free(global_abbrev); + return ret; + } + + if (!block->has_bid) + { + WARN("Missing blockinfo block id.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + if (block->blockinfo_bid == MODULE_BLOCK) + { + FIXME("Unhandled global abbreviation for module block.\n"); + return VKD3D_ERROR_INVALID_SHADER; + } + global_abbrev->block_id = block->blockinfo_bid; + + sm6->abbrevs[sm6->abbrev_count++] = global_abbrev; + + return VKD3D_OK; +} + +static enum vkd3d_result sm6_parser_add_block_abbrev(struct sm6_parser *sm6) +{ + struct dxil_block *block = sm6->current_block; + struct dxil_abbrev *abbrev; + enum vkd3d_result ret; + unsigned int count; + + if (block->id == BLOCKINFO_BLOCK) + return sm6_parser_add_global_abbrev(sm6); + + count = sm6_parser_read_vbr(sm6, 5); + if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, block->abbrev_count + 1, sizeof(*block->abbrevs)) + || !(abbrev = vkd3d_malloc(sizeof(*abbrev) + count * sizeof(abbrev->operands[0])))) + { + ERR("Failed to allocate block abbreviation.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if ((ret = dxil_abbrev_init(abbrev, count, sm6)) < 0) + { + vkd3d_free(abbrev); + return ret; + } + + block->abbrevs[block->abbrev_count++] = abbrev; + + return VKD3D_OK; +} + +static enum vkd3d_result sm6_parser_read_abbrev_record(struct sm6_parser *sm6, unsigned int abbrev_id) +{ + enum vkd3d_result ret = VKD3D_ERROR_INVALID_SHADER; + struct dxil_block *block = sm6->current_block; + struct dxil_record *temp, *record; + unsigned int i, count, array_len; + struct dxil_abbrev *abbrev; + uint64_t code; + + if (abbrev_id >= block->abbrev_count) + { + WARN("Invalid abbreviation id %u.\n", abbrev_id); + return VKD3D_ERROR_INVALID_SHADER; + } + + abbrev = block->abbrevs[abbrev_id]; + if (!(count = abbrev->count)) + return VKD3D_OK; + if (count == 1 && abbrev->is_array) + return VKD3D_ERROR_INVALID_SHADER; + + /* First operand is the record code. The array is included in the count, but will be done separately. */ + count -= abbrev->is_array + 1; + if (!(record = vkd3d_malloc(sizeof(*record) + count * sizeof(record->operands[0])))) + { + ERR("Failed to allocate record with %u operands.\n", count); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if (!abbrev->operands[0].read_operand(sm6, abbrev->operands[0].context, &code)) + goto fail; + if (code > UINT_MAX) + FIXME("Truncating 64-bit record code %#"PRIx64".\n", code); + record->code = code; + + for (i = 0; i < count; ++i) + if (!abbrev->operands[i + 1].read_operand(sm6, abbrev->operands[i + 1].context, &record->operands[i])) + goto fail; + record->operand_count = count; + + /* An array can occur only as the last operand. */ + if (abbrev->is_array) + { + array_len = sm6_parser_read_vbr(sm6, 6); + if (!(temp = vkd3d_realloc(record, sizeof(*record) + (count + array_len) * sizeof(record->operands[0])))) + { + ERR("Failed to allocate record with %u operands.\n", count + array_len); + ret = VKD3D_ERROR_OUT_OF_MEMORY; + goto fail; + } + record = temp; + + for (i = 0; i < array_len; ++i) + { + if (!abbrev->operands[count + 1].read_operand(sm6, abbrev->operands[count + 1].context, + &record->operands[count + i])) + { + goto fail; + } + } + record->operand_count += array_len; + } + + if ((ret = dxil_block_add_record(block, record)) < 0) + goto fail; + + return VKD3D_OK; + +fail: + vkd3d_free(record); + return ret; +} + +static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, + struct sm6_parser *sm6); + +static enum vkd3d_result dxil_block_read(struct dxil_block *parent, struct sm6_parser *sm6) +{ + unsigned int reserve = (parent->id == MODULE_BLOCK) ? 12 : 2; + struct dxil_block *block; + enum vkd3d_result ret; + + sm6->current_block = parent; + + do + { + unsigned int abbrev_id = sm6_parser_read_bits(sm6, parent->abbrev_len); + + switch (abbrev_id) + { + case END_BLOCK: + sm6_parser_align_32(sm6); + return VKD3D_OK; + + case ENTER_SUBBLOCK: + if (parent->id != MODULE_BLOCK && parent->id != FUNCTION_BLOCK) + { + WARN("Invalid subblock parent id %u.\n", parent->id); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (!vkd3d_array_reserve((void **)&parent->child_blocks, &parent->child_block_capacity, + max(reserve, parent->child_block_count + 1), sizeof(*parent->child_blocks)) + || !(block = vkd3d_calloc(1, sizeof(*block)))) + { + ERR("Failed to allocate block.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + if ((ret = dxil_block_init(block, parent, sm6)) < 0) + { + vkd3d_free(block); + return ret; + } + + parent->child_blocks[parent->child_block_count++] = block; + sm6->current_block = parent; + break; + + case DEFINE_ABBREV: + if ((ret = sm6_parser_add_block_abbrev(sm6)) < 0) + return ret; + break; + + case UNABBREV_RECORD: + if ((ret = sm6_parser_read_unabbrev_record(sm6)) < 0) + { + WARN("Failed to read unabbreviated record.\n"); + return ret; + } + break; + + default: + if ((ret = sm6_parser_read_abbrev_record(sm6, abbrev_id - 4)) < 0) + { + WARN("Failed to read abbreviated record.\n"); + return ret; + } + break; + } + } while (!sm6->p.failed); + + return VKD3D_ERROR_INVALID_SHADER; +} + +static unsigned int sm6_parser_compute_global_abbrev_count_for_block_id(struct sm6_parser *sm6, + unsigned int block_id) +{ + unsigned int i, count; + + for (i = 0, count = 0; i < sm6->abbrev_count; ++i) + count += sm6->abbrevs[i]->block_id == block_id; + + return count; +} + +static void dxil_block_destroy(struct dxil_block *block) +{ + unsigned int i; + + for (i = 0; i < block->record_count; ++i) + vkd3d_free(block->records[i]); + vkd3d_free(block->records); + + for (i = 0; i < block->child_block_count; ++i) + { + dxil_block_destroy(block->child_blocks[i]); + vkd3d_free(block->child_blocks[i]); + } + vkd3d_free(block->child_blocks); + + block->records = NULL; + block->record_count = 0; + block->child_blocks = NULL; + block->child_block_count = 0; +} + +static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct dxil_block *parent, + struct sm6_parser *sm6) +{ + unsigned int i, abbrev_count = 0; + enum vkd3d_result ret; + + block->parent = parent; + block->level = parent ? parent->level + 1 : 0; + block->id = sm6_parser_read_vbr(sm6, 8); + block->abbrev_len = sm6_parser_read_vbr(sm6, 4); + sm6_parser_align_32(sm6); + block->length = sm6_parser_read_uint32(sm6); + block->start = sm6->ptr - sm6->start; + + if (sm6->p.failed) + return VKD3D_ERROR_INVALID_SHADER; + + if ((block->abbrev_count = sm6_parser_compute_global_abbrev_count_for_block_id(sm6, block->id))) + { + if (!vkd3d_array_reserve((void **)&block->abbrevs, &block->abbrev_capacity, + block->abbrev_count, sizeof(*block->abbrevs))) + { + ERR("Failed to allocate block abbreviations.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + for (i = 0; i < sm6->abbrev_count; ++i) + if (sm6->abbrevs[i]->block_id == block->id) + block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; + + assert(abbrev_count == block->abbrev_count); + } + + if ((ret = dxil_block_read(block, sm6)) < 0) + dxil_block_destroy(block); + + for (i = abbrev_count; i < block->abbrev_count; ++i) + vkd3d_free(block->abbrevs[i]); + vkd3d_free(block->abbrevs); + block->abbrevs = NULL; + block->abbrev_count = 0; + + return ret; +} + +static void dxil_global_abbrevs_cleanup(struct dxil_global_abbrev **abbrevs, unsigned int count) +{ + unsigned int i; + + for (i = 0; i < count; ++i) + vkd3d_free(abbrevs[i]); + vkd3d_free(abbrevs); +} + +static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) +{ + struct sm6_parser *sm6 = sm6_parser(parser); + + dxil_block_destroy(&sm6->root_block); + dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); + shader_instruction_array_destroy(&parser->instructions); + free_shader_desc(&parser->shader_desc); + vkd3d_free(sm6); +} + +static const struct vkd3d_shader_parser_ops sm6_parser_ops = +{ + .parser_destroy = sm6_parser_destroy, +}; + +static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, + const char *source_name, struct vkd3d_shader_message_context *message_context) +{ + const struct vkd3d_shader_location location = {.source_name = source_name}; + uint32_t version_token, dxil_version, token_count, magic; + unsigned int count, length, chunk_offset, chunk_size; + enum bitcode_block_abbreviation abbr; + struct vkd3d_shader_version version; + struct dxil_block *block; + enum vkd3d_result ret; + + count = byte_code_size / sizeof(*byte_code); + if (count < 6) + { + WARN("Invalid data size %zu.\n", byte_code_size); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE, + "DXIL chunk size %zu is smaller than the DXIL header size.", byte_code_size); + return VKD3D_ERROR_INVALID_SHADER; + } + + version_token = byte_code[0]; + TRACE("Compiler version: 0x%08x.\n", version_token); + token_count = byte_code[1]; + TRACE("Token count: %u.\n", token_count); + + if (token_count < 6 || count < token_count) + { + WARN("Invalid token count %u (word count %u).\n", token_count, count); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, + "DXIL chunk token count %#x is invalid (word count %u).", token_count, count); + return VKD3D_ERROR_INVALID_SHADER; + } + + if (byte_code[2] != TAG_DXIL) + WARN("Unknown magic number 0x%08x.\n", byte_code[2]); + + dxil_version = byte_code[3]; + if (dxil_version > 0x102) + WARN("Unknown DXIL version: 0x%08x.\n", dxil_version); + else + TRACE("DXIL version: 0x%08x.\n", dxil_version); + + chunk_offset = byte_code[4]; + if (chunk_offset < 16 || chunk_offset >= byte_code_size) + { + WARN("Invalid bitcode chunk offset %#x (data size %zu).\n", chunk_offset, byte_code_size); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET, + "DXIL bitcode chunk has invalid offset %#x (data size %#zx).", chunk_offset, byte_code_size); + return VKD3D_ERROR_INVALID_SHADER; + } + chunk_size = byte_code[5]; + if (chunk_size > byte_code_size - chunk_offset) + { + WARN("Invalid bitcode chunk size %#x (data size %zu, chunk offset %#x).\n", + chunk_size, byte_code_size, chunk_offset); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE, + "DXIL bitcode chunk has invalid size %#x (data size %#zx, chunk offset %#x).", + chunk_size, byte_code_size, chunk_offset); + return VKD3D_ERROR_INVALID_SHADER; + } + + sm6->start = (const uint32_t *)((const char*)&byte_code[2] + chunk_offset); + if ((magic = sm6->start[0]) != BITCODE_MAGIC) + { + WARN("Unknown magic number 0x%08x.\n", magic); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER, + "DXIL bitcode chunk magic number 0x%08x is not the expected 0x%08x.", magic, BITCODE_MAGIC); + } + + sm6->end = &sm6->start[(chunk_size + sizeof(*sm6->start) - 1) / sizeof(*sm6->start)]; + + if ((version.type = version_token >> 16) >= VKD3D_SHADER_TYPE_COUNT) + { + FIXME("Unknown shader type %#x.\n", version.type); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE, + "Unknown shader type %#x.", version.type); + } + + version.major = VKD3D_SM6_VERSION_MAJOR(version_token); + version.minor = VKD3D_SM6_VERSION_MINOR(version_token); + + if ((abbr = sm6->start[1] & 3) != ENTER_SUBBLOCK) + { + WARN("Initial block abbreviation %u is not ENTER_SUBBLOCK.\n", abbr); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, + "DXIL bitcode chunk has invalid initial block abbreviation %u.", abbr); + return VKD3D_ERROR_INVALID_SHADER; + } + + /* Estimate instruction count to avoid reallocation in most shaders. */ + count = max(token_count, 400) - 400; + vkd3d_shader_parser_init(&sm6->p, message_context, source_name, &version, &sm6_parser_ops, + (count + (count >> 2)) / 2u + 10); + sm6->ptr = &sm6->start[1]; + sm6->bitpos = 2; + + block = &sm6->root_block; + if ((ret = dxil_block_init(block, NULL, sm6)) < 0) + { + if (ret == VKD3D_ERROR_OUT_OF_MEMORY) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, + "Out of memory parsing DXIL bitcode chunk."); + else if (ret == VKD3D_ERROR_INVALID_SHADER) + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE, + "DXIL bitcode chunk has invalid bitcode."); + else + vkd3d_unreachable(); + return ret; + } + + dxil_global_abbrevs_cleanup(sm6->abbrevs, sm6->abbrev_count); + sm6->abbrevs = NULL; + sm6->abbrev_count = 0; + + length = sm6->ptr - sm6->start - block->start; + if (length != block->length) + { + WARN("Invalid block length %u; expected %u.\n", length, block->length); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH, + "Root block ends with length %u but indicated length is %u.", length, block->length); + } + if (sm6->ptr != sm6->end) + { + unsigned int expected_length = sm6->end - sm6->start; + length = sm6->ptr - sm6->start; + WARN("Invalid module length %u; expected %u.\n", length, expected_length); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH, + "Module ends with length %u but indicated length is %u.", length, expected_length); + } + + dxil_block_destroy(&sm6->root_block); + + return VKD3D_OK; +} + +int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) +{ + struct vkd3d_shader_desc *shader_desc; + uint32_t *byte_code = NULL; + struct sm6_parser *sm6; + int ret; + + if (!(sm6 = vkd3d_calloc(1, sizeof(*sm6)))) + { + ERR("Failed to allocate parser.\n"); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + shader_desc = &sm6->p.shader_desc; + shader_desc->is_dxil = true; + if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, + shader_desc)) < 0) + { + WARN("Failed to extract shader, vkd3d result %d.\n", ret); + vkd3d_free(sm6); + return ret; + } + + sm6->p.shader_desc = *shader_desc; + shader_desc = &sm6->p.shader_desc; + + if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) + { + /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC + * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ + if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) + ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); + else + memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); + } + + ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, + compile_info->source_name, message_context); + vkd3d_free(byte_code); + + if (ret < 0) + { + WARN("Failed to initialise shader parser.\n"); + sm6_parser_destroy(&sm6->p); + return ret; + } + + *parser = &sm6->p; + + return ret; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index ba5bcfbfaf0..4e9af15c1be 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -1432,7 +1432,7 @@ struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *v } struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, - const struct vkd3d_shader_location *loc) + struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc) { struct hlsl_ir_jump *jump; @@ -1440,6 +1440,7 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type return NULL; init_node(&jump->node, HLSL_IR_JUMP, NULL, loc); jump->type = type; + hlsl_src_from_node(&jump->condition, condition); return &jump->node; } @@ -1585,9 +1586,9 @@ static struct hlsl_ir_node *clone_if(struct hlsl_ctx *ctx, struct clone_instr_ma return dst; } -static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct hlsl_ir_jump *src) +static struct hlsl_ir_node *clone_jump(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_jump *src) { - return hlsl_new_jump(ctx, src->type, &src->node.loc); + return hlsl_new_jump(ctx, src->type, map_instr(map, src->condition.node), &src->node.loc); } static struct hlsl_ir_node *clone_load(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_load *src) @@ -1728,7 +1729,7 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, return clone_index(ctx, map, hlsl_ir_index(instr)); case HLSL_IR_JUMP: - return clone_jump(ctx, hlsl_ir_jump(instr)); + return clone_jump(ctx, map, hlsl_ir_jump(instr)); case HLSL_IR_LOAD: return clone_load(ctx, map, hlsl_ir_load(instr)); @@ -2123,18 +2124,18 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) { static const char * const names[] = { - "HLSL_IR_CALL", - "HLSL_IR_CONSTANT", - "HLSL_IR_EXPR", - "HLSL_IR_IF", - "HLSL_IR_INDEX", - "HLSL_IR_LOAD", - "HLSL_IR_LOOP", - "HLSL_IR_JUMP", - "HLSL_IR_RESOURCE_LOAD", - "HLSL_IR_RESOURCE_STORE", - "HLSL_IR_STORE", - "HLSL_IR_SWIZZLE", + [HLSL_IR_CALL ] = "HLSL_IR_CALL", + [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", + [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", + [HLSL_IR_IF ] = "HLSL_IR_IF", + [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", + [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", + [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", + [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", + [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", + [HLSL_IR_RESOURCE_STORE] = "HLSL_IR_RESOURCE_STORE", + [HLSL_IR_STORE ] = "HLSL_IR_STORE", + [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", }; if (type >= ARRAY_SIZE(names)) @@ -2146,10 +2147,11 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) { static const char * const names[] = { - "HLSL_IR_JUMP_BREAK", - "HLSL_IR_JUMP_CONTINUE", - "HLSL_IR_JUMP_DISCARD", - "HLSL_IR_JUMP_RETURN", + [HLSL_IR_JUMP_BREAK] = "HLSL_IR_JUMP_BREAK", + [HLSL_IR_JUMP_CONTINUE] = "HLSL_IR_JUMP_CONTINUE", + [HLSL_IR_JUMP_DISCARD_NEG] = "HLSL_IR_JUMP_DISCARD_NEG", + [HLSL_IR_JUMP_DISCARD_NZ] = "HLSL_IR_JUMP_DISCARD_NZ", + [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", }; assert(type < ARRAY_SIZE(names)); @@ -2337,7 +2339,11 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_COS] = "cos", [HLSL_OP1_COS_REDUCED] = "cos_reduced", [HLSL_OP1_DSX] = "dsx", + [HLSL_OP1_DSX_COARSE] = "dsx_coarse", + [HLSL_OP1_DSX_FINE] = "dsx_fine", [HLSL_OP1_DSY] = "dsy", + [HLSL_OP1_DSY_COARSE] = "dsy_coarse", + [HLSL_OP1_DSY_FINE] = "dsy_fine", [HLSL_OP1_EXP2] = "exp2", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_LOG2] = "log2", @@ -2418,8 +2424,12 @@ static void dump_ir_jump(struct vkd3d_string_buffer *buffer, const struct hlsl_i vkd3d_string_buffer_printf(buffer, "continue"); break; - case HLSL_IR_JUMP_DISCARD: - vkd3d_string_buffer_printf(buffer, "discard"); + case HLSL_IR_JUMP_DISCARD_NEG: + vkd3d_string_buffer_printf(buffer, "discard_neg"); + break; + + case HLSL_IR_JUMP_DISCARD_NZ: + vkd3d_string_buffer_printf(buffer, "discard_nz"); break; case HLSL_IR_JUMP_RETURN: @@ -2703,6 +2713,7 @@ static void free_ir_if(struct hlsl_ir_if *if_node) static void free_ir_jump(struct hlsl_ir_jump *jump) { + hlsl_src_remove(&jump->condition); vkd3d_free(jump); } @@ -3127,8 +3138,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) for (bt = 0; bt <= HLSL_TYPE_LAST_SCALAR; ++bt) { - unsigned int n_variants = 0; const char *const *variants; + unsigned int n_variants; switch (bt) { @@ -3148,6 +3159,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) break; default: + n_variants = 0; + variants = NULL; break; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index bce48e94b24..17ac36a57c6 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -502,7 +502,11 @@ enum hlsl_ir_expr_op HLSL_OP1_COS, HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ HLSL_OP1_DSX, + HLSL_OP1_DSX_COARSE, + HLSL_OP1_DSX_FINE, HLSL_OP1_DSY, + HLSL_OP1_DSY_COARSE, + HLSL_OP1_DSY_FINE, HLSL_OP1_EXP2, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, @@ -558,7 +562,8 @@ enum hlsl_ir_jump_type { HLSL_IR_JUMP_BREAK, HLSL_IR_JUMP_CONTINUE, - HLSL_IR_JUMP_DISCARD, + HLSL_IR_JUMP_DISCARD_NEG, + HLSL_IR_JUMP_DISCARD_NZ, HLSL_IR_JUMP_RETURN, }; @@ -566,6 +571,8 @@ struct hlsl_ir_jump { struct hlsl_ir_node node; enum hlsl_ir_jump_type type; + /* Argument used for HLSL_IR_JUMP_DISCARD_NZ and HLSL_IR_JUMP_DISCARD_NEG. */ + struct hlsl_src condition; }; struct hlsl_ir_swizzle @@ -803,7 +810,11 @@ struct hlsl_ctx * Only used for SM1 profiles. */ struct hlsl_constant_defs { - struct hlsl_vec4 *values; + struct hlsl_constant_register + { + uint32_t index; + struct hlsl_vec4 value; + } *regs; size_t count, size; } constant_defs; /* Number of temp. registers required for the shader to run, i.e. the largest temp register @@ -1120,7 +1131,7 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, - enum hlsl_ir_jump_type type, const struct vkd3d_shader_location *loc); + enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); @@ -1132,6 +1143,8 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_load_component(struct hlsl_ctx *ctx, struct hlsl_block *block, const struct hlsl_deref *deref, unsigned int comp, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, + struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_simple_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *lhs, struct hlsl_ir_node *rhs); struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hlsl_deref *lhs, diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 0e07fe578e1..42fa2129e40 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -53,7 +53,7 @@ struct parse_initializer { struct hlsl_ir_node **args; unsigned int args_count; - struct list *instrs; + struct hlsl_block *instrs; bool braces; }; @@ -73,6 +73,10 @@ struct parse_variable_def struct hlsl_semantic semantic; struct hlsl_reg_reservation reg_reservation; struct parse_initializer initializer; + + struct hlsl_type *basic_type; + unsigned int modifiers; + struct vkd3d_shader_location modifiers_loc; }; struct parse_function @@ -85,8 +89,8 @@ struct parse_function struct parse_if_body { - struct list *then_block; - struct list *else_block; + struct hlsl_block *then_block; + struct hlsl_block *else_block; }; enum parse_assign_op @@ -134,6 +138,36 @@ static struct hlsl_ir_node *node_from_list(struct list *list) return LIST_ENTRY(list_tail(list), struct hlsl_ir_node, entry); } +static struct hlsl_ir_node *node_from_block(struct hlsl_block *block) +{ + return LIST_ENTRY(list_tail(&block->instrs), struct hlsl_ir_node, entry); +} + +static struct list *block_to_list(struct hlsl_block *block) +{ + /* This is a temporary hack to ease the transition from lists to blocks. + * It takes advantage of the fact that an allocated hlsl_block pointer is + * byte-compatible with an allocated list pointer. */ + return &block->instrs; +} + +static struct hlsl_block *list_to_block(struct list *list) +{ + /* This is a temporary hack to ease the transition from lists to blocks. + * It takes advantage of the fact that an allocated hlsl_block pointer is + * byte-compatible with an allocated list pointer. */ + return CONTAINING_RECORD(list, struct hlsl_block, instrs); +} + +static struct hlsl_block *make_empty_block(struct hlsl_ctx *ctx) +{ + struct hlsl_block *block; + + if ((block = hlsl_alloc(ctx, sizeof(*block)))) + hlsl_block_init(block); + return block; +} + static struct list *make_empty_list(struct hlsl_ctx *ctx) { struct list *list; @@ -149,6 +183,12 @@ static void destroy_instr_list(struct list *list) vkd3d_free(list); } +static void destroy_block(struct hlsl_block *block) +{ + hlsl_block_cleanup(block); + vkd3d_free(block); +} + static bool hlsl_types_are_componentwise_compatible(struct hlsl_ctx *ctx, struct hlsl_type *src, struct hlsl_type *dst) { @@ -273,9 +313,6 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ return hlsl_types_are_componentwise_equal(ctx, src, dst); } -static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, - unsigned int comp, const struct vkd3d_shader_location *loc); - static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *node, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) { @@ -333,7 +370,7 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct list *instrs, dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx); - if (!(component_load = add_load_component(ctx, instrs, node, src_idx, loc))) + if (!(component_load = hlsl_add_load_component(ctx, instrs, node, src_idx, loc))) return NULL; if (!(cast = hlsl_new_cast(ctx, component_load, dst_comp_type, loc))) @@ -405,29 +442,29 @@ static DWORD add_modifiers(struct hlsl_ctx *ctx, DWORD modifiers, DWORD mod, return modifiers | mod; } -static bool append_conditional_break(struct hlsl_ctx *ctx, struct list *cond_list) +static bool append_conditional_break(struct hlsl_ctx *ctx, struct hlsl_block *cond_block) { struct hlsl_ir_node *condition, *not, *iff, *jump; struct hlsl_block then_block; /* E.g. "for (i = 0; ; ++i)". */ - if (list_empty(cond_list)) + if (list_empty(&cond_block->instrs)) return true; - condition = node_from_list(cond_list); + condition = node_from_block(cond_block); if (!(not = hlsl_new_unary_expr(ctx, HLSL_OP1_LOGIC_NOT, condition, &condition->loc))) return false; - list_add_tail(cond_list, ¬->entry); + hlsl_block_add_instr(cond_block, not); hlsl_block_init(&then_block); - if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &condition->loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &condition->loc))) return false; hlsl_block_add_instr(&then_block, jump); if (!(iff = hlsl_new_if(ctx, not, &then_block, NULL, &condition->loc))) return false; - list_add_tail(cond_list, &iff->entry); + hlsl_block_add_instr(cond_block, iff); return true; } @@ -454,10 +491,10 @@ static bool attribute_list_has_duplicates(const struct parse_attribute_list *att return false; } -static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct list *init, struct list *cond, - struct list *iter, struct list *body, const struct vkd3d_shader_location *loc) +static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, + const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, + struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) { - struct hlsl_block body_block; struct hlsl_ir_node *loop; unsigned int i; @@ -476,7 +513,7 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const } else { - hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented.\n"); + hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); } } else if (!strcmp(attr->name, "loop") @@ -491,38 +528,34 @@ static struct list *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const } } - if (!init && !(init = make_empty_list(ctx))) + if (!init && !(init = make_empty_block(ctx))) goto oom; if (!append_conditional_break(ctx, cond)) goto oom; - hlsl_block_init(&body_block); - - if (type != LOOP_DO_WHILE) - list_move_tail(&body_block.instrs, cond); - - list_move_tail(&body_block.instrs, body); - if (iter) - list_move_tail(&body_block.instrs, iter); + hlsl_block_add_block(body, iter); if (type == LOOP_DO_WHILE) - list_move_tail(&body_block.instrs, cond); + list_move_tail(&body->instrs, &cond->instrs); + else + list_move_head(&body->instrs, &cond->instrs); - if (!(loop = hlsl_new_loop(ctx, &body_block, loc))) + if (!(loop = hlsl_new_loop(ctx, body, loc))) goto oom; - list_add_tail(init, &loop->entry); + hlsl_block_add_instr(init, loop); - vkd3d_free(cond); - vkd3d_free(body); + destroy_block(cond); + destroy_block(body); + destroy_block(iter); return init; oom: - destroy_instr_list(init); - destroy_instr_list(cond); - destroy_instr_list(iter); - destroy_instr_list(body); + destroy_block(init); + destroy_block(cond); + destroy_block(iter); + destroy_block(body); return NULL; } @@ -539,7 +572,7 @@ static unsigned int initializer_size(const struct parse_initializer *initializer static void free_parse_initializer(struct parse_initializer *initializer) { - destroy_instr_list(initializer->instrs); + destroy_block(initializer->instrs); vkd3d_free(initializer->args); } @@ -625,7 +658,7 @@ static struct hlsl_ir_node *get_swizzle(struct hlsl_ctx *ctx, struct hlsl_ir_nod return NULL; } -static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, +static bool add_return(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *return_value, const struct vkd3d_shader_location *loc) { struct hlsl_type *return_type = ctx->cur_function->return_type; @@ -637,7 +670,7 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, { struct hlsl_ir_node *store; - if (!(return_value = add_implicit_conversion(ctx, instrs, return_value, return_type, loc))) + if (!(return_value = add_implicit_conversion(ctx, block_to_list(block), return_value, return_type, loc))) return false; if (!(store = hlsl_new_simple_store(ctx, ctx->cur_function->return_var, return_value))) @@ -656,14 +689,14 @@ static bool add_return(struct hlsl_ctx *ctx, struct list *instrs, hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RETURN, "Void functions cannot return a value."); } - if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_RETURN, NULL, loc))) return false; - list_add_tail(instrs, &jump->entry); + hlsl_block_add_instr(block, jump); return true; } -static struct hlsl_ir_node *add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, +struct hlsl_ir_node *hlsl_add_load_component(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_ir_node *var_instr, unsigned int comp, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *load, *store; @@ -704,7 +737,7 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct list *instrs, struct return true; } -static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc); @@ -830,6 +863,16 @@ static bool shader_is_sm_5_1(const struct hlsl_ctx *ctx) return ctx->profile->major_version == 5 && ctx->profile->minor_version >= 1; } +static bool shader_profile_version_ge(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +{ + return ctx->profile->major_version > major || (ctx->profile->major_version == major && ctx->profile->minor_version >= minor); +} + +static bool shader_profile_version_lt(const struct hlsl_ctx *ctx, unsigned int major, unsigned int minor) +{ + return !shader_profile_version_ge(ctx, major, minor); +} + static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, struct hlsl_type *type, unsigned int modifiers, struct list *defs) { @@ -1020,7 +1063,7 @@ static struct hlsl_reg_reservation parse_packoffset(struct hlsl_ctx *ctx, const struct hlsl_reg_reservation reservation = {0}; char *endptr; - if (ctx->profile->major_version < 4) + if (shader_profile_version_lt(ctx, 4, 0)) return reservation; reservation.offset_index = strtoul(reg_string + 1, &endptr, 10); @@ -1097,20 +1140,50 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str { struct hlsl_ir_constant *constant; struct hlsl_ir_node *node; + struct hlsl_block expr; unsigned int ret = 0; bool progress; - if (!add_implicit_conversion(ctx, &block->instrs, node_from_list(&block->instrs), + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { + switch (node->type) + { + case HLSL_IR_CONSTANT: + case HLSL_IR_EXPR: + case HLSL_IR_SWIZZLE: + case HLSL_IR_LOAD: + case HLSL_IR_INDEX: + continue; + case HLSL_IR_CALL: + case HLSL_IR_IF: + case HLSL_IR_LOOP: + case HLSL_IR_JUMP: + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_STORE: + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Expected literal expression."); + } + } + + if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) + return 0; + hlsl_block_add_block(&expr, block); + + if (!add_implicit_conversion(ctx, &expr.instrs, node_from_list(&expr.instrs), hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) + { + hlsl_block_cleanup(&expr); return 0; + } do { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL); - progress |= hlsl_copy_propagation_execute(ctx, block); + progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, &expr, NULL); + progress |= hlsl_copy_propagation_execute(ctx, &expr); } while (progress); - node = node_from_list(&block->instrs); + node = node_from_list(&expr.instrs); if (node->type == HLSL_IR_CONSTANT) { constant = hlsl_ir_constant(node); @@ -1119,9 +1192,11 @@ static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, str else { hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Failed to evaluate constant expression %d.", node->type); + "Failed to evaluate constant expression."); } + hlsl_block_cleanup(&expr); + return ret; } @@ -1284,7 +1359,7 @@ static struct hlsl_ir_node *add_expr(struct hlsl_ctx *ctx, struct list *instrs, { if (operands[j]) { - if (!(load = add_load_component(ctx, instrs, operands[j], i, loc))) + if (!(load = hlsl_add_load_component(ctx, instrs, operands[j], i, loc))) return NULL; cell_operands[j] = load; @@ -1334,12 +1409,12 @@ static void check_integer_type(struct hlsl_ctx *ctx, const struct hlsl_ir_node * } } -static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_unary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {arg}; - return add_expr(ctx, instrs, op, args, arg->data_type, loc); + return add_expr(ctx, block_to_list(block), op, args, arg->data_type, loc); } static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, @@ -1347,7 +1422,7 @@ static struct hlsl_ir_node *add_unary_bitwise_expr(struct hlsl_ctx *ctx, struct { check_integer_type(ctx, arg); - return add_unary_arithmetic_expr(ctx, instrs, op, arg, loc); + return add_unary_arithmetic_expr(ctx, list_to_block(instrs), op, arg, loc); } static struct hlsl_ir_node *add_unary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, @@ -1378,7 +1453,7 @@ static struct hlsl_type *get_common_numeric_type(struct hlsl_ctx *ctx, const str return hlsl_get_numeric_type(ctx, type, base, dimx, dimy); } -static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { @@ -1387,24 +1462,13 @@ static struct hlsl_ir_node *add_binary_arithmetic_expr(struct hlsl_ctx *ctx, str common_type = get_common_numeric_type(ctx, arg1, arg2, loc); - if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + if (!(args[0] = add_implicit_conversion(ctx, block_to_list(block), arg1, common_type, loc))) return NULL; - if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + if (!(args[1] = add_implicit_conversion(ctx, block_to_list(block), arg2, common_type, loc))) return NULL; - return add_expr(ctx, instrs, op, args, common_type, loc); -} - -static struct list *add_binary_arithmetic_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_arithmetic_expr(ctx, list1, op, arg1, arg2, loc); - return list1; + return add_expr(ctx, block_to_list(block), op, args, common_type, loc); } static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct list *instrs, @@ -1414,19 +1478,7 @@ static struct hlsl_ir_node *add_binary_bitwise_expr(struct hlsl_ctx *ctx, struct check_integer_type(ctx, arg1); check_integer_type(ctx, arg2); - return add_binary_arithmetic_expr(ctx, instrs, op, arg1, arg2, loc); -} - -static struct list *add_binary_bitwise_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_bitwise_expr(ctx, list1, op, arg1, arg2, loc); - - return list1; + return add_binary_arithmetic_expr(ctx, list_to_block(instrs), op, arg1, arg2, loc); } static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, struct list *instrs, @@ -1454,17 +1506,6 @@ static struct hlsl_ir_node *add_binary_comparison_expr(struct hlsl_ctx *ctx, str return add_expr(ctx, instrs, op, args, return_type, loc); } -static struct list *add_binary_comparison_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_comparison_expr(ctx, list1, op, arg1, arg2, loc); - return list1; -} - static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct list *instrs, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) @@ -1488,18 +1529,6 @@ static struct hlsl_ir_node *add_binary_logical_expr(struct hlsl_ctx *ctx, struct return add_expr(ctx, instrs, op, args, common_type, loc); } -static struct list *add_binary_logical_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_logical_expr(ctx, list1, op, arg1, arg2, loc); - - return list1; -} - static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct list *instrs, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) @@ -1531,19 +1560,7 @@ static struct hlsl_ir_node *add_binary_shift_expr(struct hlsl_ctx *ctx, struct l return add_expr(ctx, instrs, op, args, return_type, loc); } -static struct list *add_binary_shift_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, - enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); - - list_move_tail(list1, list2); - vkd3d_free(list2); - add_binary_shift_expr(ctx, list1, op, arg1, arg2, loc); - - return list1; -} - -static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct list *instrs, +static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { enum hlsl_base_type base = expr_common_base_type(arg1->data_type->base_type, arg2->data_type->base_type); @@ -1557,8 +1574,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis struct vkd3d_string_buffer *string; if ((string = hlsl_type_to_string(ctx, arg1->data_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid type %s.\n", string->buffer); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); hlsl_release_string_buffer(ctx, string); return NULL; } @@ -1568,8 +1584,7 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis struct vkd3d_string_buffer *string; if ((string = hlsl_type_to_string(ctx, arg2->data_type))) - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Invalid type %s.\n", string->buffer); + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid type %s.", string->buffer); hlsl_release_string_buffer(ctx, string); return NULL; } @@ -1589,13 +1604,60 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct lis common_type = hlsl_get_vector_type(ctx, base, dim); ret_type = hlsl_get_scalar_type(ctx, base); - if (!(args[0] = add_implicit_conversion(ctx, instrs, arg1, common_type, loc))) + if (!(args[0] = add_implicit_conversion(ctx, block_to_list(instrs), arg1, common_type, loc))) return NULL; - if (!(args[1] = add_implicit_conversion(ctx, instrs, arg2, common_type, loc))) + if (!(args[1] = add_implicit_conversion(ctx, block_to_list(instrs), arg2, common_type, loc))) return NULL; - return add_expr(ctx, instrs, op, args, ret_type, loc); + return add_expr(ctx, block_to_list(instrs), op, args, ret_type, loc); +} + +static struct list *add_binary_expr_merge(struct hlsl_ctx *ctx, struct list *list1, struct list *list2, + enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg1 = node_from_list(list1), *arg2 = node_from_list(list2); + + list_move_tail(list1, list2); + vkd3d_free(list2); + + switch (op) + { + case HLSL_OP2_ADD: + case HLSL_OP2_DIV: + case HLSL_OP2_MOD: + case HLSL_OP2_MUL: + add_binary_arithmetic_expr(ctx, list_to_block(list1), op, arg1, arg2, loc); + break; + + case HLSL_OP2_BIT_AND: + case HLSL_OP2_BIT_OR: + case HLSL_OP2_BIT_XOR: + add_binary_bitwise_expr(ctx, list1, op, arg1, arg2, loc); + break; + + case HLSL_OP2_LESS: + case HLSL_OP2_GEQUAL: + case HLSL_OP2_EQUAL: + case HLSL_OP2_NEQUAL: + add_binary_comparison_expr(ctx, list1, op, arg1, arg2, loc); + break; + + case HLSL_OP2_LOGIC_AND: + case HLSL_OP2_LOGIC_OR: + add_binary_logical_expr(ctx, list1, op, arg1, arg2, loc); + break; + + case HLSL_OP2_LSHIFT: + case HLSL_OP2_RSHIFT: + add_binary_shift_expr(ctx, list1, op, arg1, arg2, loc); + break; + + default: + vkd3d_unreachable(); + } + + return list1; } static enum hlsl_ir_expr_op op_from_assignment(enum parse_assign_op op) @@ -1663,7 +1725,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in if (assign_op == ASSIGN_OP_SUB) { - if (!(rhs = add_unary_arithmetic_expr(ctx, instrs, HLSL_OP1_NEG, rhs, &rhs->loc))) + if (!(rhs = add_unary_arithmetic_expr(ctx, list_to_block(instrs), HLSL_OP1_NEG, rhs, &rhs->loc))) return NULL; assign_op = ASSIGN_OP_ADD; } @@ -1672,7 +1734,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in enum hlsl_ir_expr_op op = op_from_assignment(assign_op); assert(op); - if (!(rhs = add_binary_arithmetic_expr(ctx, instrs, op, lhs, rhs, &rhs->loc))) + if (!(rhs = add_binary_arithmetic_expr(ctx, list_to_block(instrs), op, lhs, rhs, &rhs->loc))) return NULL; } @@ -1779,7 +1841,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct list *in return NULL; list_add_tail(instrs, &cell->entry); - if (!(load = add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) + if (!(load = hlsl_add_load_component(ctx, instrs, rhs, k++, &rhs->loc))) return NULL; if (!hlsl_init_deref_from_index_chain(ctx, &deref, cell)) @@ -1853,7 +1915,7 @@ static bool add_increment(struct hlsl_ctx *ctx, struct list *instrs, bool decrem return true; } -static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, +static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) { unsigned int src_comp_count = hlsl_type_component_count(src->data_type); @@ -1868,17 +1930,17 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct list *instrs, struct hlsl_type *dst_comp_type; struct hlsl_block block; - if (!(load = add_load_component(ctx, instrs, src, k, &src->loc))) + if (!(load = hlsl_add_load_component(ctx, block_to_list(instrs), src, k, &src->loc))) return; dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) + if (!(conv = add_implicit_conversion(ctx, block_to_list(instrs), load, dst_comp_type, &src->loc))) return; if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) return; - list_move_tail(instrs, &block.instrs); + hlsl_block_add_block(instrs, &block); ++*store_index; } @@ -1924,211 +1986,234 @@ static bool type_has_numeric_components(struct hlsl_type *type) return false; } -static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_type, - unsigned int modifiers, const struct vkd3d_shader_location *modifiers_loc, struct list *var_list) +static void check_invalid_in_out_modifiers(struct hlsl_ctx *ctx, unsigned int modifiers, + const struct vkd3d_shader_location *loc) { - struct parse_variable_def *v, *v_next; + modifiers &= (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); + if (modifiers) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_modifiers_to_string(ctx, modifiers))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); + hlsl_release_string_buffer(ctx, string); + } +} + +static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) +{ + struct hlsl_type *basic_type = v->basic_type; struct hlsl_ir_function_decl *func; - unsigned int invalid_modifiers; - struct list *statements_list; + struct hlsl_semantic new_semantic; + uint32_t modifiers = v->modifiers; + bool unbounded_res_array = false; struct hlsl_ir_var *var; struct hlsl_type *type; bool local = true; + char *var_name; + unsigned int i; + + assert(basic_type); if (basic_type->class == HLSL_CLASS_MATRIX) assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); - if (!(statements_list = make_empty_list(ctx))) - { - LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) - free_parse_variable_def(v); - vkd3d_free(var_list); - return NULL; - } - - if (!var_list) - return statements_list; + type = basic_type; - invalid_modifiers = modifiers & (HLSL_STORAGE_IN | HLSL_STORAGE_OUT); - if (invalid_modifiers) + if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) { - struct vkd3d_string_buffer *string; - - if ((string = hlsl_modifiers_to_string(ctx, invalid_modifiers))) - hlsl_error(ctx, modifiers_loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers '%s' are not allowed on non-parameter variables.", string->buffer); - hlsl_release_string_buffer(ctx, string); + for (i = 0; i < v->arrays.count; ++i) + unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); } - LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) + if (unbounded_res_array) { - bool unbounded_res_array = false; - unsigned int i; - - type = basic_type; - - if (shader_is_sm_5_1(ctx) && type->class == HLSL_CLASS_OBJECT) + if (v->arrays.count == 1) { - for (i = 0; i < v->arrays.count; ++i) - unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); + hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); + return; } - - if (unbounded_res_array) + else { - if (v->arrays.count == 1) - { - hlsl_fixme(ctx, &v->loc, "Unbounded resource arrays."); - free_parse_variable_def(v); - continue; - } - else - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Unbounded resource arrays cannot be multi-dimensional."); - } + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Unbounded resource arrays cannot be multi-dimensional."); } - else + } + else + { + for (i = 0; i < v->arrays.count; ++i) { - for (i = 0; i < v->arrays.count; ++i) + if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) { - if (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT) - { - unsigned int size = initializer_size(&v->initializer); - unsigned int elem_components = hlsl_type_component_count(type); - - if (i < v->arrays.count - 1) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Only innermost array size can be implicit."); - free_parse_initializer(&v->initializer); - v->initializer.args_count = 0; - } - else if (elem_components == 0) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Cannot declare an implicit size array of a size 0 type."); - free_parse_initializer(&v->initializer); - v->initializer.args_count = 0; - } - else if (size == 0) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Implicit size arrays need to be initialized."); - free_parse_initializer(&v->initializer); - v->initializer.args_count = 0; + unsigned int size = initializer_size(&v->initializer); + unsigned int elem_components = hlsl_type_component_count(type); - } - else if (size % elem_components != 0) - { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Cannot initialize implicit size array with %u components, expected a multiple of %u.", - size, elem_components); - free_parse_initializer(&v->initializer); - v->initializer.args_count = 0; - } - else - { - v->arrays.sizes[i] = size / elem_components; - } + if (i < v->arrays.count - 1) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Only innermost array size can be implicit."); + v->initializer.args_count = 0; + } + else if (elem_components == 0) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Cannot declare an implicit size array of a size 0 type."); + v->initializer.args_count = 0; + } + else if (size == 0) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Implicit size arrays need to be initialized."); + v->initializer.args_count = 0; + } + else if (size % elem_components != 0) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Cannot initialize implicit size array with %u components, expected a multiple of %u.", + size, elem_components); + v->initializer.args_count = 0; + } + else + { + v->arrays.sizes[i] = size / elem_components; } - type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); } + type = hlsl_new_array_type(ctx, type, v->arrays.sizes[i]); } - vkd3d_free(v->arrays.sizes); - - if (!(var = hlsl_new_var(ctx, v->name, type, &v->loc, &v->semantic, modifiers, &v->reg_reservation))) - { - free_parse_variable_def(v); - continue; - } + } - var->buffer = ctx->cur_buffer; + if (!(var_name = vkd3d_strdup(v->name))) + return; - if (var->buffer == ctx->globals_buffer) + new_semantic = v->semantic; + if (v->semantic.name) + { + if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) { - if (var->reg_reservation.offset_type) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "packoffset() is only allowed inside constant buffer declarations."); + vkd3d_free(var_name); + return; } + } - if (ctx->cur_scope == ctx->globals) - { - local = false; + if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) + { + hlsl_cleanup_semantic(&new_semantic); + vkd3d_free(var_name); + return; + } - if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); + var->buffer = ctx->cur_buffer; - /* Mark it as uniform. We need to do this here since synthetic - * variables also get put in the global scope, but shouldn't be - * considered uniforms, and we have no way of telling otherwise. */ - if (!(modifiers & HLSL_STORAGE_STATIC)) - var->storage_modifiers |= HLSL_STORAGE_UNIFORM; + if (var->buffer == ctx->globals_buffer) + { + if (var->reg_reservation.offset_type) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "packoffset() is only allowed inside constant buffer declarations."); + } - if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && - type_has_object_components(var->data_type, true)) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Target profile doesn't support objects as struct members in uniform variables.\n"); - } + if (ctx->cur_scope == ctx->globals) + { + local = false; - if ((func = hlsl_get_func_decl(ctx, var->name))) - { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "'%s' is already defined as a function.", var->name); - hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, - "'%s' was previously defined here.", var->name); - } - } - else - { - static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED - | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; + if ((modifiers & HLSL_STORAGE_UNIFORM) && (modifiers & HLSL_STORAGE_STATIC)) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Variable '%s' is declared as both \"uniform\" and \"static\".", var->name); - if (modifiers & invalid) - { - struct vkd3d_string_buffer *string; + /* Mark it as uniform. We need to do this here since synthetic + * variables also get put in the global scope, but shouldn't be + * considered uniforms, and we have no way of telling otherwise. */ + if (!(modifiers & HLSL_STORAGE_STATIC)) + var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers '%s' are not allowed on local variables.", string->buffer); - hlsl_release_string_buffer(ctx, string); - } - if (var->semantic.name) - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, - "Semantics are not allowed on local variables."); + if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM) && + type_has_object_components(var->data_type, true)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Target profile doesn't support objects as struct members in uniform variables."); } - if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) - && type_has_object_components(var->data_type, false)) + if ((func = hlsl_get_func_decl(ctx, var->name))) { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Static variables cannot have both numeric and resource components."); + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "'%s' is already defined as a function.", var->name); + hlsl_note(ctx, &func->loc, VKD3D_SHADER_LOG_ERROR, + "'%s' was previously defined here.", var->name); } + } + else + { + static const unsigned int invalid = HLSL_STORAGE_EXTERN | HLSL_STORAGE_SHARED + | HLSL_STORAGE_GROUPSHARED | HLSL_STORAGE_UNIFORM; - if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count - && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) + if (modifiers & invalid) { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, - "Const variable \"%s\" is missing an initializer.", var->name); - hlsl_free_var(var); - free_parse_initializer(&v->initializer); - vkd3d_free(v); - continue; + struct vkd3d_string_buffer *string; + + if ((string = hlsl_modifiers_to_string(ctx, modifiers & invalid))) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers '%s' are not allowed on local variables.", string->buffer); + hlsl_release_string_buffer(ctx, string); } + if (var->semantic.name) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, + "Semantics are not allowed on local variables."); + } + + if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) + && type_has_object_components(var->data_type, false)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Static variables cannot have both numeric and resource components."); + } + + if ((type->modifiers & HLSL_MODIFIER_CONST) && !v->initializer.args_count + && !(modifiers & (HLSL_STORAGE_STATIC | HLSL_STORAGE_UNIFORM))) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, + "Const variable \"%s\" is missing an initializer.", var->name); + hlsl_free_var(var); + return; + } + + if (!hlsl_add_var(ctx, var, local)) + { + struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); - if (!hlsl_add_var(ctx, var, local)) + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, + "Variable \"%s\" was already declared in this scope.", var->name); + hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); + hlsl_free_var(var); + return; + } +} + +static struct list *initialize_vars(struct hlsl_ctx *ctx, struct list *var_list) +{ + struct parse_variable_def *v, *v_next; + struct list *statements_list; + struct hlsl_ir_var *var; + struct hlsl_type *type; + + if (!(statements_list = make_empty_list(ctx))) + { + LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) { - struct hlsl_ir_var *old = hlsl_get_var(ctx->cur_scope, var->name); + free_parse_variable_def(v); + } + vkd3d_free(var_list); + return NULL; + } - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_REDEFINED, - "Variable \"%s\" was already declared in this scope.", var->name); - hlsl_note(ctx, &old->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously declared here.", old->name); - hlsl_free_var(var); - free_parse_initializer(&v->initializer); - vkd3d_free(v); + LIST_FOR_EACH_ENTRY_SAFE(v, v_next, var_list, struct parse_variable_def, entry) + { + /* If this fails, the variable failed to be declared. */ + if (!(var = hlsl_get_var(ctx->cur_scope, v->name))) + { + free_parse_variable_def(v); continue; } + type = var->data_type; if (v->initializer.args_count) { @@ -2143,8 +2228,7 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, "Expected %u components in initializer, but got %u.", hlsl_type_component_count(type), size); - free_parse_initializer(&v->initializer); - vkd3d_free(v); + free_parse_variable_def(v); continue; } @@ -2159,16 +2243,14 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); assert(v->initializer.args_count == 1); - list_add_tail(v->initializer.instrs, &load->node.entry); - add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); + hlsl_block_add_instr(v->initializer.instrs, &load->node); + add_assignment(ctx, block_to_list(v->initializer.instrs), &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); } - if (modifiers & HLSL_STORAGE_STATIC) - list_move_tail(&ctx->static_initializers.instrs, v->initializer.instrs); + if (var->storage_modifiers & HLSL_STORAGE_STATIC) + hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); else - list_move_tail(statements_list, v->initializer.instrs); - vkd3d_free(v->initializer.args); - vkd3d_free(v->initializer.instrs); + list_move_tail(statements_list, &v->initializer.instrs->instrs); } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { @@ -2178,32 +2260,33 @@ static struct list *declare_vars(struct hlsl_ctx *ctx, struct hlsl_type *basic_t if (type_has_object_components(var->data_type, false)) { - vkd3d_free(v); + free_parse_variable_def(v); continue; } if (!(zero = hlsl_new_uint_constant(ctx, 0, &var->loc))) { - vkd3d_free(v); + free_parse_variable_def(v); continue; } hlsl_block_add_instr(&ctx->static_initializers, zero); if (!(cast = add_cast(ctx, &ctx->static_initializers.instrs, zero, var->data_type, &var->loc))) { - vkd3d_free(v); + free_parse_variable_def(v); continue; } if (!(store = hlsl_new_simple_store(ctx, var, cast))) { - vkd3d_free(v); + free_parse_variable_def(v); continue; } hlsl_block_add_instr(&ctx->static_initializers, store); } - vkd3d_free(v); + free_parse_variable_def(v); } + vkd3d_free(var_list); return statements_list; } @@ -2286,7 +2369,7 @@ static struct hlsl_ir_node *intrinsic_float_convert_arg(struct hlsl_ctx *ctx, return arg; type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); - return add_implicit_conversion(ctx, params->instrs, arg, type, loc); + return add_implicit_conversion(ctx, block_to_list(params->instrs), arg, type, loc); } static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *params, @@ -2298,7 +2381,7 @@ static bool convert_args(struct hlsl_ctx *ctx, const struct parse_initializer *p { struct hlsl_ir_node *new_arg; - if (!(new_arg = add_implicit_conversion(ctx, params->instrs, params->args[i], type, loc))) + if (!(new_arg = add_implicit_conversion(ctx, block_to_list(params->instrs), params->args[i], type, loc))) return false; params->args[i] = new_arg; } @@ -2394,25 +2477,25 @@ static bool intrinsic_all(struct hlsl_ctx *ctx, if (!(one = hlsl_new_float_constant(ctx, 1.0f, loc))) return false; - list_add_tail(params->instrs, &one->entry); + hlsl_block_add_instr(params->instrs, one); if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero); mul = one; count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) { - if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, i, loc))) return false; if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, load, mul, loc))) return false; } - return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, mul, zero, loc); + return !!add_binary_comparison_expr(ctx, block_to_list(params->instrs), HLSL_OP2_NEQUAL, mul, zero, loc); } static bool intrinsic_any(struct hlsl_ctx *ctx, @@ -2431,28 +2514,28 @@ static bool intrinsic_any(struct hlsl_ctx *ctx, { if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero); if (!(dot = add_binary_dot_expr(ctx, params->instrs, arg, arg, loc))) return false; - return !!add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_NEQUAL, dot, zero, loc); + return !!add_binary_comparison_expr(ctx, block_to_list(params->instrs), HLSL_OP2_NEQUAL, dot, zero, loc); } else if (arg->data_type->base_type == HLSL_TYPE_BOOL) { if (!(bfalse = hlsl_new_bool_constant(ctx, false, loc))) return false; - list_add_tail(params->instrs, &bfalse->entry); + hlsl_block_add_instr(params->instrs, bfalse); or = bfalse; count = hlsl_type_component_count(arg->data_type); for (i = 0; i < count; ++i) { - if (!(load = add_load_component(ctx, params->instrs, arg, i, loc))) + if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, i, loc))) return false; - if (!(or = add_binary_bitwise_expr(ctx, params->instrs, HLSL_OP2_BIT_OR, or, load, loc))) + if (!(or = add_binary_bitwise_expr(ctx, block_to_list(params->instrs), HLSL_OP2_BIT_OR, or, load, loc))) return false; } @@ -2491,7 +2574,7 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_FLOAT); operands[0] = params->args[0]; - return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); + return add_expr(ctx, block_to_list(params->instrs), HLSL_OP1_REINTERPRET, operands, data_type, loc); } static bool intrinsic_asuint(struct hlsl_ctx *ctx, @@ -2527,7 +2610,7 @@ static bool intrinsic_asuint(struct hlsl_ctx *ctx, data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_UINT); operands[0] = params->args[0]; - return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); + return add_expr(ctx, block_to_list(params->instrs), HLSL_OP1_REINTERPRET, operands, data_type, loc); } static bool intrinsic_clamp(struct hlsl_ctx *ctx, @@ -2544,6 +2627,34 @@ static bool intrinsic_clamp(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MIN, max, params->args[2], loc); } +static bool intrinsic_clip(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *condition, *jump; + + if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) + return false; + + condition = params->args[0]; + + if (ctx->profile->major_version < 4 && hlsl_type_component_count(condition->data_type) > 4) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, condition->data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Argument type cannot exceed 4 components, got type \"%s\".", string->buffer); + hlsl_release_string_buffer(ctx, string); + return false; + } + + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NEG, condition, loc))) + return false; + hlsl_block_add_instr(params->instrs, jump); + + return true; +} + static bool intrinsic_cos(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2571,34 +2682,34 @@ static bool intrinsic_cross(struct hlsl_ctx *ctx, cast_type = hlsl_get_vector_type(ctx, base, 3); - if (!(arg1_cast = add_implicit_conversion(ctx, params->instrs, arg1, cast_type, loc))) + if (!(arg1_cast = add_implicit_conversion(ctx, block_to_list(params->instrs), arg1, cast_type, loc))) return false; - if (!(arg2_cast = add_implicit_conversion(ctx, params->instrs, arg2, cast_type, loc))) + if (!(arg2_cast = add_implicit_conversion(ctx, block_to_list(params->instrs), arg2, cast_type, loc))) return false; if (!(arg1_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl1->entry); + hlsl_block_add_instr(params->instrs, arg1_swzl1); if (!(arg2_swzl1 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl1->entry); + hlsl_block_add_instr(params->instrs, arg2_swzl1); if (!(mul1 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl1, arg2_swzl1, loc))) return false; if (!(mul1_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, mul1, loc))) return false; - list_add_tail(params->instrs, &mul1_neg->entry); + hlsl_block_add_instr(params->instrs, mul1_neg); if (!(arg1_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Y, Z, X, Y), 3, arg1_cast, loc))) return false; - list_add_tail(params->instrs, &arg1_swzl2->entry); + hlsl_block_add_instr(params->instrs, arg1_swzl2); if (!(arg2_swzl2 = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, X, Y, Z), 3, arg2_cast, loc))) return false; - list_add_tail(params->instrs, &arg2_swzl2->entry); + hlsl_block_add_instr(params->instrs, arg2_swzl2); if (!(mul2 = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg1_swzl2, arg2_swzl2, loc))) return false; @@ -2617,6 +2728,28 @@ static bool intrinsic_ddx(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX, arg, loc); } +static bool intrinsic_ddx_coarse(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_COARSE, arg, loc); +} + +static bool intrinsic_ddx_fine(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSX_FINE, arg, loc); +} + static bool intrinsic_ddy(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2628,6 +2761,28 @@ static bool intrinsic_ddy(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY, arg, loc); } +static bool intrinsic_ddy_coarse(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_COARSE, arg, loc); +} + +static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2668,7 +2823,7 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, /* 1/ln(2) */ if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) return false; - list_add_tail(params->instrs, &coeff->entry); + hlsl_block_add_instr(params->instrs, coeff); if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) return false; @@ -2715,7 +2870,7 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer if (!(zero = hlsl_new_constant(ctx, div->data_type, &zero_value, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero); if (!(abs = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_ABS, div, loc))) return false; @@ -2726,10 +2881,10 @@ static bool intrinsic_fmod(struct hlsl_ctx *ctx, const struct parse_initializer if (!(neg_frac = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, frac, loc))) return false; - if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, div, zero, loc))) + if (!(ge = add_binary_comparison_expr(ctx, block_to_list(params->instrs), HLSL_OP2_GEQUAL, div, zero, loc))) return false; - if (!(select = hlsl_add_conditional(ctx, params->instrs, ge, frac, neg_frac))) + if (!(select = hlsl_add_conditional(ctx, block_to_list(params->instrs), ge, frac, neg_frac))) return false; return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, select, y, loc); @@ -2806,7 +2961,7 @@ static bool intrinsic_lerp(struct hlsl_ctx *ctx, } static struct hlsl_ir_node * add_pow_expr(struct hlsl_ctx *ctx, - struct list *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, + struct hlsl_block *instrs, struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *log, *mul; @@ -2861,15 +3016,15 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, init_value.u[3].f = 1.0f; if (!(init = hlsl_new_constant(ctx, ret_type, &init_value, loc))) return false; - list_add_tail(params->instrs, &init->entry); + hlsl_block_add_instr(params->instrs, init); if (!(store = hlsl_new_simple_store(ctx, var, init))) return false; - list_add_tail(params->instrs, &store->entry); + hlsl_block_add_instr(params->instrs, store); if (!(zero = hlsl_new_float_constant(ctx, 0.0f, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero); /* Diffuse component. */ if (!(diffuse = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MAX, n_l, zero, loc))) @@ -2877,31 +3032,31 @@ static bool intrinsic_lit(struct hlsl_ctx *ctx, if (!hlsl_new_store_component(ctx, &block, &var_deref, 1, diffuse)) return false; - list_move_tail(params->instrs, &block.instrs); + hlsl_block_add_block(params->instrs, &block); /* Specular component. */ - if (!(n_h_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_h, zero, loc))) + if (!(n_h_neg = add_binary_comparison_expr(ctx, block_to_list(params->instrs), HLSL_OP2_LESS, n_h, zero, loc))) return false; - if (!(n_l_neg = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, n_l, zero, loc))) + if (!(n_l_neg = add_binary_comparison_expr(ctx, block_to_list(params->instrs), HLSL_OP2_LESS, n_l, zero, loc))) return false; - if (!(specular_or = add_binary_logical_expr(ctx, params->instrs, HLSL_OP2_LOGIC_OR, n_l_neg, n_h_neg, loc))) + if (!(specular_or = add_binary_logical_expr(ctx, block_to_list(params->instrs), HLSL_OP2_LOGIC_OR, n_l_neg, n_h_neg, loc))) return false; if (!(specular_pow = add_pow_expr(ctx, params->instrs, n_h, m, loc))) return false; - if (!(load = hlsl_add_conditional(ctx, params->instrs, specular_or, zero, specular_pow))) + if (!(load = hlsl_add_conditional(ctx, block_to_list(params->instrs), specular_or, zero, specular_pow))) return false; if (!hlsl_new_store_component(ctx, &block, &var_deref, 2, load)) return false; - list_move_tail(params->instrs, &block.instrs); + hlsl_block_add_block(params->instrs, &block); if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &var_load->node.entry); + hlsl_block_add_instr(params->instrs, &var_load->node); return true; } @@ -3013,10 +3168,10 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, ret_type = hlsl_get_scalar_type(ctx, base); } - if (!(cast1 = add_implicit_conversion(ctx, params->instrs, arg1, cast_type1, loc))) + if (!(cast1 = add_implicit_conversion(ctx, block_to_list(params->instrs), arg1, cast_type1, loc))) return false; - if (!(cast2 = add_implicit_conversion(ctx, params->instrs, arg2, cast_type2, loc))) + if (!(cast2 = add_implicit_conversion(ctx, block_to_list(params->instrs), arg2, cast_type2, loc))) return false; if (!(var = hlsl_new_synthetic_var(ctx, "mul", matrix_type, loc))) @@ -3034,10 +3189,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, { struct hlsl_ir_node *value1, *value2, *mul; - if (!(value1 = add_load_component(ctx, params->instrs, cast1, j * cast1->data_type->dimx + k, loc))) + if (!(value1 = hlsl_add_load_component(ctx, block_to_list(params->instrs), + cast1, j * cast1->data_type->dimx + k, loc))) return false; - if (!(value2 = add_load_component(ctx, params->instrs, cast2, k * cast2->data_type->dimx + i, loc))) + if (!(value2 = hlsl_add_load_component(ctx, block_to_list(params->instrs), + cast2, k * cast2->data_type->dimx + i, loc))) return false; if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, value1, value2, loc))) @@ -3056,15 +3213,15 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, if (!hlsl_new_store_component(ctx, &block, &var_deref, j * matrix_type->dimx + i, instr)) return false; - list_move_tail(params->instrs, &block.instrs); + hlsl_block_add_block(params->instrs, &block); } } if (!(load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &load->node.entry); + hlsl_block_add_instr(params->instrs, &load->node); - return !!add_implicit_conversion(ctx, params->instrs, &load->node, ret_type, loc); + return !!add_implicit_conversion(ctx, block_to_list(params->instrs), &load->node, ret_type, loc); } static bool intrinsic_normalize(struct hlsl_ctx *ctx, @@ -3169,22 +3326,22 @@ static bool intrinsic_sign(struct hlsl_ctx *ctx, if (!(zero = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, arg->data_type->base_type), &zero_value, loc))) return false; - list_add_tail(params->instrs, &zero->entry); + hlsl_block_add_instr(params->instrs, zero); /* Check if 0 < arg, cast bool to int */ - if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, zero, arg, loc))) + if (!(lt = add_binary_comparison_expr(ctx, block_to_list(params->instrs), HLSL_OP2_LESS, zero, arg, loc))) return false; - if (!(op1 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) + if (!(op1 = add_implicit_conversion(ctx, block_to_list(params->instrs), lt, int_type, loc))) return false; /* Check if arg < 0, cast bool to int and invert (meaning true is -1) */ - if (!(lt = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_LESS, arg, zero, loc))) + if (!(lt = add_binary_comparison_expr(ctx, block_to_list(params->instrs), HLSL_OP2_LESS, arg, zero, loc))) return false; - if (!(op2 = add_implicit_conversion(ctx, params->instrs, lt, int_type, loc))) + if (!(op2 = add_implicit_conversion(ctx, block_to_list(params->instrs), lt, int_type, loc))) return false; if (!(neg = add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_NEG, op2, loc))) @@ -3229,7 +3386,7 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, if (!(one = hlsl_new_float_constant(ctx, 1.0, loc))) return false; - list_add_tail(params->instrs, &one->entry); + hlsl_block_add_instr(params->instrs, one); if (!(p_denom = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_DIV, one, p_denom, loc))) return false; @@ -3242,11 +3399,11 @@ static bool intrinsic_smoothstep(struct hlsl_ctx *ctx, if (!(minus_two = hlsl_new_float_constant(ctx, -2.0, loc))) return false; - list_add_tail(params->instrs, &minus_two->entry); + hlsl_block_add_instr(params->instrs, minus_two); if (!(three = hlsl_new_float_constant(ctx, 3.0, loc))) return false; - list_add_tail(params->instrs, &three->entry); + hlsl_block_add_instr(params->instrs, three); if (!(res = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, minus_two, p, loc))) return false; @@ -3283,13 +3440,13 @@ static bool intrinsic_step(struct hlsl_ctx *ctx, if (!elementwise_intrinsic_float_convert_args(ctx, params, loc)) return false; - if (!(ge = add_binary_comparison_expr(ctx, params->instrs, HLSL_OP2_GEQUAL, + if (!(ge = add_binary_comparison_expr(ctx, block_to_list(params->instrs), HLSL_OP2_GEQUAL, params->args[1], params->args[0], loc))) return false; type = ge->data_type; type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); - return !!add_implicit_conversion(ctx, params->instrs, ge, type, loc); + return !!add_implicit_conversion(ctx, block_to_list(params->instrs), ge, type, loc); } static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, @@ -3308,7 +3465,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * if (params->args_count == 4) { - hlsl_fixme(ctx, loc, "Samples with gradients are not implemented.\n"); + hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); } sampler_type = params->args[0]->data_type; @@ -3324,7 +3481,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * hlsl_release_string_buffer(ctx, string); } - if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], + if (!(coords = add_implicit_conversion(ctx, block_to_list(params->instrs), params->args[1], hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) coords = params->args[1]; @@ -3335,7 +3492,7 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * if (!(load = hlsl_new_resource_load(ctx, &load_params, loc))) return false; - list_add_tail(params->instrs, &load->entry); + hlsl_block_add_instr(params->instrs, load); return true; } @@ -3369,7 +3526,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, if ((string = hlsl_type_to_string(ctx, arg_type))) hlsl_error(ctx, &arg->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.\n", + "Wrong type for argument 1 of transpose(): expected a matrix or scalar type, but got '%s'.", string->buffer); hlsl_release_string_buffer(ctx, string); return false; @@ -3377,7 +3534,7 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, if (arg_type->class == HLSL_CLASS_SCALAR) { - list_add_tail(params->instrs, &arg->entry); + hlsl_block_add_instr(params->instrs, arg); return true; } @@ -3393,18 +3550,18 @@ static bool intrinsic_transpose(struct hlsl_ctx *ctx, { struct hlsl_block block; - if (!(load = add_load_component(ctx, params->instrs, arg, j * arg->data_type->dimx + i, loc))) + if (!(load = hlsl_add_load_component(ctx, block_to_list(params->instrs), arg, j * arg->data_type->dimx + i, loc))) return false; if (!hlsl_new_store_component(ctx, &block, &var_deref, i * var->data_type->dimx + j, load)) return false; - list_move_tail(params->instrs, &block.instrs); + hlsl_block_add_block(params->instrs, &block); } } if (!(var_load = hlsl_new_var_load(ctx, var, loc))) return false; - list_add_tail(params->instrs, &var_load->node.entry); + hlsl_block_add_instr(params->instrs, &var_load->node); return true; } @@ -3444,13 +3601,13 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, if (!(c = hlsl_new_float_constant(ctx, 255.0f + (0.5f / 256.0f), loc))) return false; - list_add_tail(params->instrs, &c->entry); + hlsl_block_add_instr(params->instrs, c); if (arg_type->class == HLSL_CLASS_VECTOR) { if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(Z, Y, X, W), 4, arg, loc))) return false; - list_add_tail(params->instrs, &swizzle->entry); + hlsl_block_add_instr(params->instrs, swizzle); arg = swizzle; } @@ -3458,7 +3615,7 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, if (!(ret = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, c, loc))) return false; - if (ctx->profile->major_version >= 4) + if (shader_profile_version_ge(ctx, 4, 0)) return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_TRUNC, ret, loc); return true; @@ -3482,10 +3639,15 @@ intrinsic_functions[] = {"asfloat", 1, true, intrinsic_asfloat}, {"asuint", -1, true, intrinsic_asuint}, {"clamp", 3, true, intrinsic_clamp}, + {"clip", 1, true, intrinsic_clip}, {"cos", 1, true, intrinsic_cos}, {"cross", 2, true, intrinsic_cross}, {"ddx", 1, true, intrinsic_ddx}, + {"ddx_coarse", 1, true, intrinsic_ddx_coarse}, + {"ddx_fine", 1, true, intrinsic_ddx_fine}, {"ddy", 1, true, intrinsic_ddy}, + {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, + {"ddy_fine", 1, true, intrinsic_ddy_fine}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, @@ -3549,7 +3711,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, { struct hlsl_ir_node *cast; - if (!(cast = add_cast(ctx, args->instrs, arg, param->data_type, &arg->loc))) + if (!(cast = add_cast(ctx, block_to_list(args->instrs), arg, param->data_type, &arg->loc))) goto fail; args->args[i] = cast; arg = cast; @@ -3561,13 +3723,13 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, if (!(store = hlsl_new_simple_store(ctx, param, arg))) goto fail; - list_add_tail(args->instrs, &store->entry); + hlsl_block_add_instr(args->instrs, store); } } if (!(call = hlsl_new_call(ctx, decl, loc))) goto fail; - list_add_tail(args->instrs, &call->entry); + hlsl_block_add_instr(args->instrs, call); for (i = 0; i < decl->parameters.count; ++i) { @@ -3584,9 +3746,9 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, if (!(load = hlsl_new_var_load(ctx, param, &arg->loc))) goto fail; - list_add_tail(args->instrs, &load->node.entry); + hlsl_block_add_instr(args->instrs, &load->node); - if (!add_assignment(ctx, args->instrs, arg, ASSIGN_OP_ASSIGN, &load->node)) + if (!add_assignment(ctx, block_to_list(args->instrs), arg, ASSIGN_OP_ASSIGN, &load->node)) goto fail; } } @@ -3597,7 +3759,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, if (!(load = hlsl_new_var_load(ctx, decl->return_var, loc))) goto fail; - list_add_tail(args->instrs, &load->node.entry); + hlsl_block_add_instr(args->instrs, &load->node); } else { @@ -3606,7 +3768,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_VOID, operands, ctx->builtin_types.Void, loc))) goto fail; - list_add_tail(args->instrs, &expr->entry); + hlsl_block_add_instr(args->instrs, expr); } } else if ((intrinsic = bsearch(name, intrinsic_functions, ARRAY_SIZE(intrinsic_functions), @@ -3655,7 +3817,7 @@ static struct list *add_call(struct hlsl_ctx *ctx, const char *name, goto fail; } vkd3d_free(args->args); - return args->instrs; + return block_to_list(args->instrs); fail: free_parse_initializer(args); @@ -3692,10 +3854,10 @@ static struct list *add_constructor(struct hlsl_ctx *ctx, struct hlsl_type *type if (!(load = hlsl_new_var_load(ctx, var, loc))) return NULL; - list_add_tail(params->instrs, &load->node.entry); + hlsl_block_add_instr(params->instrs, &load->node); vkd3d_free(params->args); - return params->instrs; + return block_to_list(params->instrs); } static unsigned int hlsl_offset_dim_count(enum hlsl_sampler_dim dim) @@ -4272,6 +4434,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type char *name; DWORD modifiers; struct hlsl_ir_node *instr; + struct hlsl_block *block; struct list *list; struct parse_fields fields; struct parse_function function; @@ -4404,33 +4567,23 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type bitand_expr %type bitor_expr %type bitxor_expr -%type compound_statement %type conditional_expr %type declaration %type declaration_statement -%type discard_statement %type equality_expr -%type expr -%type expr_optional -%type expr_statement %type initializer_expr -%type jump_statement %type logicand_expr %type logicor_expr -%type loop_statement %type mul_expr %type postfix_expr %type primary_expr %type relational_expr -%type selection_statement %type shift_expr -%type statement -%type statement_list -%type struct_declaration +%type struct_declaration_without_vars %type type_specs %type unary_expr %type variables_def -%type variables_def_optional +%type variables_def_typed %token VAR_IDENTIFIER %token NEW_IDENTIFIER @@ -4446,6 +4599,16 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type attribute_list %type attribute_list_optional +%type compound_statement +%type expr +%type expr_optional +%type expr_statement +%type jump_statement +%type loop_statement +%type selection_statement +%type statement +%type statement_list + %type boolean %type buffer_type @@ -4493,6 +4656,7 @@ static void validate_texture_format_type(struct hlsl_ctx *ctx, struct hlsl_type %type type_spec %type variable_decl %type variable_def +%type variable_def_typed %% @@ -4561,25 +4725,19 @@ preproc_directive: } } -struct_declaration: - var_modifiers struct_spec variables_def_optional ';' +struct_declaration_without_vars: + var_modifiers struct_spec ';' { - struct hlsl_type *type; - unsigned int modifiers = $1; + if (!$2->name) + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Anonymous struct type must declare a variable."); - if (!$3) - { - if (!$2->name) - hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Anonymous struct type must declare a variable."); - if (modifiers) - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, - "Modifiers are not allowed on struct type declarations."); - } + if ($1) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Modifiers are not allowed on struct type declarations."); - if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + if (!($$ = make_empty_list(ctx))) YYABORT; - $$ = declare_vars(ctx, type, modifiers, &@1, $3); } struct_spec: @@ -4702,7 +4860,7 @@ attribute: } $$->name = $2; list_init(&$$->instrs); - list_move_tail(&$$->instrs, $4.instrs); + list_move_tail(&$$->instrs, &$4.instrs->instrs); vkd3d_free($4.instrs); $$->loc = @$; $$->args_count = $4.args_count; @@ -4758,15 +4916,15 @@ func_declaration: "Function \"%s\" is already defined.", decl->func->name); hlsl_note(ctx, &decl->loc, VKD3D_SHADER_LOG_ERROR, "\"%s\" was previously defined here.", decl->func->name); - hlsl_free_instr_list($2); + destroy_block($2); } else { size_t i; decl->has_body = true; - list_move_tail(&decl->body.instrs, $2); - vkd3d_free($2); + hlsl_block_add_block(&decl->body, $2); + destroy_block($2); /* Semantics are taken from whichever definition has a body. * We can't just replace the hlsl_ir_var pointers, though: if @@ -4943,7 +5101,7 @@ func_prototype: compound_statement: '{' '}' { - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; } | '{' scope_start statement_list '}' @@ -5261,7 +5419,12 @@ type_no_void: { validate_texture_format_type(ctx, $3, &@3); - /* TODO: unspecified sample count is not allowed for all targets */ + if (shader_profile_version_lt(ctx, 4, 1)) + { + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); + } + $$ = hlsl_new_texture_type(ctx, $1, $3, 0); } | texture_ms_type '<' type ',' shift_expr '>' @@ -5325,7 +5488,7 @@ type_no_void: $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); if ($$->is_minimum_precision) { - if (ctx->profile->major_version < 4) + if (shader_profile_version_lt(ctx, 4, 0)) { hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Target profile doesn't support minimum-precision types."); @@ -5354,7 +5517,7 @@ type: declaration_statement: declaration - | struct_declaration + | struct_declaration_without_vars | typedef { if (!($$ = make_empty_list(ctx))) @@ -5416,23 +5579,12 @@ type_spec: } declaration: - var_modifiers type variables_def ';' + variables_def_typed ';' { - struct hlsl_type *type; - unsigned int modifiers = $1; - - if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + if (!($$ = initialize_vars(ctx, $1))) YYABORT; - $$ = declare_vars(ctx, type, modifiers, &@1, $3); } -variables_def_optional: - %empty - { - $$ = NULL; - } - | variables_def - variables_def: variable_def { @@ -5446,6 +5598,33 @@ variables_def: list_add_tail($$, &$3->entry); } +variables_def_typed: + variable_def_typed + { + if (!($$ = make_empty_list(ctx))) + YYABORT; + list_add_head($$, &$1->entry); + + declare_var(ctx, $1); + } + | variables_def_typed ',' variable_def + { + struct parse_variable_def *head_def; + + assert(!list_empty($1)); + head_def = LIST_ENTRY(list_head($1), struct parse_variable_def, entry); + + assert(head_def->basic_type); + $3->basic_type = head_def->basic_type; + $3->modifiers = head_def->modifiers; + $3->modifiers_loc = head_def->modifiers_loc; + + declare_var(ctx, $3); + + $$ = $1; + list_add_tail($$, &$3->entry); + } + variable_decl: any_identifier arrays colon_attribute { @@ -5461,7 +5640,7 @@ state: any_identifier '=' expr ';' { vkd3d_free($1); - hlsl_free_instr_list($3); + destroy_block($3); } state_block_start: @@ -5487,6 +5666,38 @@ variable_def: ctx->in_state_block = 0; } +variable_def_typed: + var_modifiers struct_spec variable_def + { + unsigned int modifiers = $1; + struct hlsl_type *type; + + if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + YYABORT; + + check_invalid_in_out_modifiers(ctx, modifiers, &@1); + + $$ = $3; + $$->basic_type = type; + $$->modifiers = modifiers; + $$->modifiers_loc = @1; + } + | var_modifiers type variable_def + { + unsigned int modifiers = $1; + struct hlsl_type *type; + + if (!(type = apply_type_modifiers(ctx, $2, &modifiers, true, &@1))) + YYABORT; + + check_invalid_in_out_modifiers(ctx, modifiers, &@1); + + $$ = $3; + $$->basic_type = type; + $$->modifiers = modifiers; + $$->modifiers_loc = @1; + } + arrays: %empty { @@ -5495,17 +5706,12 @@ arrays: } | '[' expr ']' arrays { - struct hlsl_block block; uint32_t *new_array; unsigned int size; - hlsl_clone_block(ctx, &block, &ctx->static_initializers); - list_move_tail(&block.instrs, $2); + size = evaluate_static_expression_as_uint(ctx, $2, &@2); - size = evaluate_static_expression_as_uint(ctx, &block, &@2); - - hlsl_block_cleanup(&block); - vkd3d_free($2); + destroy_block($2); $$ = $4; @@ -5622,7 +5828,7 @@ complex_initializer: YYABORT; } $$.args[0] = node_from_list($1); - $$.instrs = $1; + $$.instrs = list_to_block($1); $$.braces = false; } | '{' complex_initializer_list '}' @@ -5653,7 +5859,7 @@ complex_initializer_list: $$.args = new_args; for (i = 0; i < $3.args_count; ++i) $$.args[$$.args_count++] = $3.args[i]; - list_move_tail($$.instrs, $3.instrs); + hlsl_block_add_block($$.instrs, $3.instrs); free_parse_initializer(&$3); } @@ -5670,7 +5876,7 @@ initializer_expr_list: YYABORT; } $$.args[0] = node_from_list($1); - $$.instrs = $1; + $$.instrs = list_to_block($1); $$.braces = false; } | initializer_expr_list ',' initializer_expr @@ -5686,7 +5892,7 @@ initializer_expr_list: } $$.args = new_args; $$.args[$$.args_count++] = node_from_list($3); - list_move_tail($$.instrs, $3); + list_move_tail(&$$.instrs->instrs, $3); vkd3d_free($3); } @@ -5705,15 +5911,17 @@ statement_list: | statement_list statement { $$ = $1; - list_move_tail($$, $2); - vkd3d_free($2); + hlsl_block_add_block($$, $2); + destroy_block($2); } statement: declaration_statement + { + $$ = list_to_block($1); + } | expr_statement | compound_statement - | discard_statement | jump_statement | selection_statement | loop_statement @@ -5721,47 +5929,47 @@ statement: jump_statement: KW_RETURN expr ';' { - if (!add_return(ctx, $2, node_from_list($2), &@1)) - YYABORT; $$ = $2; + if (!add_return(ctx, $$, node_from_block($$), &@1)) + YYABORT; } | KW_RETURN ';' { - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; if (!add_return(ctx, $$, NULL, &@1)) YYABORT; } - -discard_statement: - KW_DISCARD ';' + | KW_DISCARD ';' { - struct hlsl_ir_node *discard; + struct hlsl_ir_node *discard, *c; - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; - if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD, &@1))) + + if (!(c = hlsl_new_uint_constant(ctx, ~0u, &@1))) + return false; + hlsl_block_add_instr($$, c); + + if (!(discard = hlsl_new_jump(ctx, HLSL_IR_JUMP_DISCARD_NZ, c, &@1))) return false; - list_add_tail($$, &discard->entry); + hlsl_block_add_instr($$, discard); } selection_statement: KW_IF '(' expr ')' if_body { - struct hlsl_ir_node *condition = node_from_list($3); - struct hlsl_block then_block, else_block; + struct hlsl_ir_node *condition = node_from_block($3); struct hlsl_ir_node *instr; - hlsl_block_init(&then_block); - list_move_tail(&then_block.instrs, $5.then_block); - hlsl_block_init(&else_block); - if ($5.else_block) - list_move_tail(&else_block.instrs, $5.else_block); - vkd3d_free($5.then_block); - vkd3d_free($5.else_block); - - if (!(instr = hlsl_new_if(ctx, condition, &then_block, &else_block, &@1))) + if (!(instr = hlsl_new_if(ctx, condition, $5.then_block, $5.else_block, &@1))) + { + destroy_block($5.then_block); + destroy_block($5.else_block); YYABORT; + } + destroy_block($5.then_block); + destroy_block($5.else_block); if (condition->data_type->dimx > 1 || condition->data_type->dimy > 1) { struct vkd3d_string_buffer *string; @@ -5772,7 +5980,7 @@ selection_statement: hlsl_release_string_buffer(ctx, string); } $$ = $3; - list_add_tail($$, &instr->entry); + hlsl_block_add_instr($$, instr); } if_body: @@ -5803,14 +6011,14 @@ loop_statement: } | attribute_list_optional KW_FOR '(' scope_start declaration expr_statement expr_optional ')' statement { - $$ = create_loop(ctx, LOOP_FOR, &$1, $5, $6, $7, $9, &@2); + $$ = create_loop(ctx, LOOP_FOR, &$1, list_to_block($5), $6, $7, $9, &@2); hlsl_pop_scope(ctx); } expr_optional: %empty { - if (!($$ = make_empty_list(ctx))) + if (!($$ = make_empty_block(ctx))) YYABORT; } | expr @@ -5826,7 +6034,7 @@ func_arguments: { $$.args = NULL; $$.args_count = 0; - if (!($$.instrs = make_empty_list(ctx))) + if (!($$.instrs = make_empty_block(ctx))) YYABORT; $$.braces = false; } @@ -5880,7 +6088,7 @@ primary_expr: } | '(' expr ')' { - $$ = $2; + $$ = block_to_list($2); } | var_identifier '(' func_arguments ')' { @@ -5974,10 +6182,10 @@ postfix_expr: } | postfix_expr '[' expr ']' { - struct hlsl_ir_node *array = node_from_list($1), *index = node_from_list($3); + struct hlsl_ir_node *array = node_from_list($1), *index = node_from_block($3); - list_move_head($1, $3); - vkd3d_free($3); + list_move_head($1, &$3->instrs); + destroy_block($3); if (!add_array_access(ctx, $1, array, index, &@2)) { @@ -6027,7 +6235,7 @@ postfix_expr: { struct hlsl_ir_node *object = node_from_list($1); - list_move_tail($1, $5.instrs); + list_move_tail($1, &$5.instrs->instrs); vkd3d_free($5.instrs); if (!add_method_call(ctx, $1, object, $3, &$5, &@3)) @@ -6066,7 +6274,7 @@ unary_expr: } | '-' unary_expr { - add_unary_arithmetic_expr(ctx, $2, HLSL_OP1_NEG, node_from_list($2), &@1); + add_unary_arithmetic_expr(ctx, list_to_block($2), HLSL_OP1_NEG, node_from_list($2), &@1); $$ = $2; } | '~' unary_expr @@ -6130,119 +6338,118 @@ mul_expr: unary_expr | mul_expr '*' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MUL, &@2); } | mul_expr '/' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_DIV, &@2); } | mul_expr '%' unary_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_MOD, &@2); } add_expr: mul_expr | add_expr '+' mul_expr { - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); } | add_expr '-' mul_expr { struct hlsl_ir_node *neg; - if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, node_from_list($3), &@2))) + if (!(neg = add_unary_arithmetic_expr(ctx, list_to_block($3), HLSL_OP1_NEG, node_from_list($3), &@2))) YYABORT; - list_add_tail($3, &neg->entry); - $$ = add_binary_arithmetic_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_ADD, &@2); } shift_expr: add_expr | shift_expr OP_LEFTSHIFT add_expr { - $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LSHIFT, &@2); } | shift_expr OP_RIGHTSHIFT add_expr { - $$ = add_binary_shift_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_RSHIFT, &@2); } relational_expr: shift_expr | relational_expr '<' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LESS, &@2); } | relational_expr '>' shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); + $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_LESS, &@2); } | relational_expr OP_LE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); + $$ = add_binary_expr_merge(ctx, $3, $1, HLSL_OP2_GEQUAL, &@2); } | relational_expr OP_GE shift_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_GEQUAL, &@2); } equality_expr: relational_expr | equality_expr OP_EQ relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_EQUAL, &@2); } | equality_expr OP_NE relational_expr { - $$ = add_binary_comparison_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_NEQUAL, &@2); } bitand_expr: equality_expr | bitand_expr '&' equality_expr { - $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_AND, &@2); } bitxor_expr: bitand_expr | bitxor_expr '^' bitand_expr { - $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_XOR, &@2); } bitor_expr: bitxor_expr | bitor_expr '|' bitxor_expr { - $$ = add_binary_bitwise_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_BIT_OR, &@2); } logicand_expr: bitor_expr | logicand_expr OP_AND bitor_expr { - $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_AND, &@2); } logicor_expr: logicand_expr | logicor_expr OP_OR logicand_expr { - $$ = add_binary_logical_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); + $$ = add_binary_expr_merge(ctx, $1, $3, HLSL_OP2_LOGIC_OR, &@2); } conditional_expr: logicor_expr | logicor_expr '?' expr ':' assignment_expr { - struct hlsl_ir_node *cond = node_from_list($1), *first = node_from_list($3), *second = node_from_list($5); + struct hlsl_ir_node *cond = node_from_list($1), *first = node_from_block($3), *second = node_from_list($5); struct hlsl_type *common_type; - list_move_tail($1, $3); + list_move_tail($1, &$3->instrs); list_move_tail($1, $5); - vkd3d_free($3); + destroy_block($3); vkd3d_free($5); if (!(common_type = get_common_numeric_type(ctx, first, second, &@3))) @@ -6326,9 +6533,12 @@ assign_op: expr: assignment_expr + { + $$ = list_to_block($1); + } | expr ',' assignment_expr { $$ = $1; - list_move_tail($$, $3); + list_move_tail(&$$->instrs, $3); vkd3d_free($3); } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 765b1907426..8927e291183 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -666,7 +666,7 @@ static void insert_early_return_break(struct hlsl_ctx *ctx, return; list_add_after(&cf_instr->entry, &load->node.entry); - if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, &cf_instr->loc))) + if (!(jump = hlsl_new_jump(ctx, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc))) return; hlsl_block_add_instr(&then_block, jump); @@ -1889,7 +1889,7 @@ static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr if (rhs->type != HLSL_IR_LOAD) { - hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type.\n"); + hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type."); return false; } @@ -2584,6 +2584,61 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return true; } +static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; + static const struct hlsl_constant_value zero_value; + struct hlsl_type *arg_type, *cmp_type; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; + struct hlsl_ir_jump *jump; + unsigned int i, count; + struct list instrs; + + if (instr->type != HLSL_IR_JUMP) + return false; + jump = hlsl_ir_jump(instr); + if (jump->type != HLSL_IR_JUMP_DISCARD_NEG) + return false; + + list_init(&instrs); + + arg_type = jump->condition.node->data_type; + if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc))) + return false; + list_add_tail(&instrs, &zero->entry); + + operands[0] = jump->condition.node; + operands[1] = zero; + cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL, arg_type->dimx, arg_type->dimy); + if (!(cmp = hlsl_new_expr(ctx, HLSL_OP2_LESS, operands, cmp_type, &instr->loc))) + return false; + list_add_tail(&instrs, &cmp->entry); + + if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc))) + return false; + list_add_tail(&instrs, &bool_false->entry); + + or = bool_false; + + count = hlsl_type_component_count(cmp_type); + for (i = 0; i < count; ++i) + { + if (!(load = hlsl_add_load_component(ctx, &instrs, cmp, i, &instr->loc))) + return false; + + if (!(or = hlsl_new_binary_expr(ctx, HLSL_OP2_LOGIC_OR, or, load))) + return NULL; + list_add_tail(&instrs, &or->entry); + } + + list_move_tail(&instr->entry, &instrs); + hlsl_src_remove(&jump->condition); + hlsl_src_from_node(&jump->condition, or); + jump->type = HLSL_IR_JUMP_DISCARD_NZ; + + return true; +} + static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { switch (instr->type) @@ -2848,8 +2903,15 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop index->idx.node->last_read = last_read; break; } - case HLSL_IR_CONSTANT: case HLSL_IR_JUMP: + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); + + if (jump->condition.node) + jump->condition.node->last_read = last_read; + break; + } + case HLSL_IR_CONSTANT: break; } } @@ -3192,10 +3254,33 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, } } +static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f) +{ + struct hlsl_constant_defs *defs = &ctx->constant_defs; + struct hlsl_constant_register *reg; + size_t i; + + for (i = 0; i < defs->count; ++i) + { + reg = &defs->regs[i]; + if (reg->index == (component_index / 4)) + { + reg->value.f[component_index % 4] = f; + return; + } + } + + if (!hlsl_array_reserve(ctx, (void **)&defs->regs, &defs->size, defs->count + 1, sizeof(*defs->regs))) + return; + reg = &defs->regs[defs->count++]; + memset(reg, 0, sizeof(*reg)); + reg->index = component_index / 4; + reg->value.f[component_index % 4] = f; +} + static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block, struct register_allocator *allocator) { - struct hlsl_constant_defs *defs = &ctx->constant_defs; struct hlsl_ir_node *instr; LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) @@ -3206,66 +3291,52 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, { struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); const struct hlsl_type *type = instr->data_type; - unsigned int x, y, i, writemask, end_reg; - unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int x, i; constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); - if (!hlsl_array_reserve(ctx, (void **)&defs->values, &defs->size, - constant->reg.id + reg_size / 4, sizeof(*defs->values))) - return; - end_reg = constant->reg.id + reg_size / 4; - if (end_reg > defs->count) - { - memset(&defs->values[defs->count], 0, sizeof(*defs->values) * (end_reg - defs->count)); - defs->count = end_reg; - } - assert(type->class <= HLSL_CLASS_LAST_NUMERIC); + assert(type->dimy == 1); + assert(constant->reg.writemask); - if (!(writemask = constant->reg.writemask)) - writemask = (1u << type->dimx) - 1; - - for (y = 0; y < type->dimy; ++y) + for (x = 0, i = 0; x < 4; ++x) { - for (x = 0, i = 0; x < 4; ++x) + const union hlsl_constant_value_component *value; + float f; + + if (!(constant->reg.writemask & (1u << x))) + continue; + value = &constant->value.u[i++]; + + switch (type->base_type) { - const union hlsl_constant_value_component *value; - float f; - - if (!(writemask & (1u << x))) - continue; - value = &constant->value.u[i++]; - - switch (type->base_type) - { - case HLSL_TYPE_BOOL: - f = !!value->u; - break; - - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - f = value->f; - break; - - case HLSL_TYPE_INT: - f = value->i; - break; - - case HLSL_TYPE_UINT: - f = value->u; - break; - - case HLSL_TYPE_DOUBLE: - FIXME("Double constant.\n"); - return; - - default: - vkd3d_unreachable(); - } - defs->values[constant->reg.id + y].f[x] = f; + case HLSL_TYPE_BOOL: + f = !!value->u; + break; + + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + f = value->f; + break; + + case HLSL_TYPE_INT: + f = value->i; + break; + + case HLSL_TYPE_UINT: + f = value->u; + break; + + case HLSL_TYPE_DOUBLE: + FIXME("Double constant.\n"); + return; + + default: + vkd3d_unreachable(); } + + record_constant(ctx, constant->reg.id * 4 + x, f); } break; @@ -3297,8 +3368,6 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi struct register_allocator allocator = {0}; struct hlsl_ir_var *var; - allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (var->is_uniform && var->last_read) @@ -3315,6 +3384,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi } } + allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); + vkd3d_free(allocator.allocations); } @@ -4062,6 +4133,10 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE, "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name); + if (profile->major_version >= 4) + { + hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); + } hlsl_transform_ir(ctx, lower_broadcasts, body, NULL); while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL)); do diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index 301113c8477..01c438ae212 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -80,7 +80,7 @@ static bool fold_cast(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, return false; } - for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (src->node.data_type->base_type) { @@ -160,7 +160,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, assert(type == src->node.data_type->base_type); - for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { @@ -186,6 +186,51 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, return true; } +static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src, const struct vkd3d_shader_location *loc) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src->node.data_type->base_type); + + for (k = 0; k < 4; ++k) + { + switch (type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + if (ctx->profile->major_version >= 4 && src->value.u[k].f == 0.0f) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + "Floating point division by zero."); + } + dst->u[k].f = 1.0f / src->value.u[k].f; + if (ctx->profile->major_version < 4 && !isfinite(dst->u[k].f)) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, + "Infinities and NaNs are not allowed by the shader model."); + } + break; + + case HLSL_TYPE_DOUBLE: + if (src->value.u[k].d == 0.0) + { + hlsl_warning(ctx, loc, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO, + "Floating point division by zero."); + } + dst->u[k].d = 1.0 / src->value.u[k].d; + break; + + default: + FIXME("Fold 'rcp' for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + + return true; +} + static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { @@ -195,7 +240,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type); - for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { @@ -223,7 +268,7 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; } -static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { enum hlsl_base_type type = dst_type->base_type; @@ -232,64 +277,73 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons assert(type == src1->node.data_type->base_type); assert(type == src2->node.data_type->base_type); - for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { switch (type) { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; - break; - - case HLSL_TYPE_DOUBLE: - dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; - break; - case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; break; default: - FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); + FIXME("Fold bit/logic and for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; } -static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { + enum hlsl_base_type type = dst_type->base_type; unsigned int k; - assert(dst_type->base_type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); - for (k = 0; k < 4; ++k) + for (k = 0; k < dst_type->dimx; ++k) { - switch (src1->node.data_type->base_type) + switch (type) { - case HLSL_TYPE_FLOAT: - case HLSL_TYPE_HALF: - dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; break; - case HLSL_TYPE_DOUBLE: - dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; - break; + default: + FIXME("Fold bit/logic or for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; + } + } + return true; +} + +static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + enum hlsl_base_type type = dst_type->base_type; + unsigned int k; + + assert(type == src1->node.data_type->base_type); + assert(type == src2->node.data_type->base_type); + for (k = 0; k < dst_type->dimx; ++k) + { + switch (type) + { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - case HLSL_TYPE_BOOL: - dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; + dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; break; default: - vkd3d_unreachable(); + FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); + return false; } - - dst->u[k].u *= ~0u; } return true; } @@ -363,45 +417,116 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; } -static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, - const struct vkd3d_shader_location *loc) +static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type); for (k = 0; k < dst_type->dimx; ++k) { - switch (type) + switch (src1->node.data_type->base_type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].f == src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].d == src2->value.u[k].d; + break; + case HLSL_TYPE_INT: - if (src2->value.u[k].i == 0) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); - return false; - } - if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) - dst->u[k].i = 0; - else - dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u == src2->value.u[k].u; + break; + + default: + vkd3d_unreachable(); + } + + dst->u[k].u *= ~0u; + } + return true; +} + +static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + unsigned int k; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (src1->node.data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].f >= src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].d >= src2->value.u[k].d; + break; + + case HLSL_TYPE_INT: + dst->u[k].u = src1->value.u[k].i >= src2->value.u[k].i; break; case HLSL_TYPE_UINT: - if (src2->value.u[k].u == 0) - { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); - return false; - } - dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u >= src2->value.u[k].u; break; default: - FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; + vkd3d_unreachable(); } + + dst->u[k].u *= ~0u; + } + return true; +} + +static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +{ + unsigned int k; + + assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type); + + for (k = 0; k < dst_type->dimx; ++k) + { + switch (src1->node.data_type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].f < src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].d < src2->value.u[k].d; + break; + + case HLSL_TYPE_INT: + dst->u[k].u = src1->value.u[k].i < src2->value.u[k].i; + break; + + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u < src2->value.u[k].u; + break; + + default: + vkd3d_unreachable(); + } + + dst->u[k].u *= ~0u; } return true; } @@ -419,6 +544,15 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons { switch (type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = fmaxf(src1->value.u[k].f, src2->value.u[k].f); + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = fmax(src1->value.u[k].d, src2->value.u[k].d); + break; + case HLSL_TYPE_INT: dst->u[k].i = max(src1->value.u[k].i, src2->value.u[k].i); break; @@ -448,6 +582,15 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons { switch (type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = fminf(src1->value.u[k].f, src2->value.u[k].f); + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = fmin(src1->value.u[k].d, src2->value.u[k].d); + break; + case HLSL_TYPE_INT: dst->u[k].i = min(src1->value.u[k].i, src2->value.u[k].i); break; @@ -464,8 +607,9 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons return true; } -static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, - const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) +static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, + const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2, + const struct vkd3d_shader_location *loc) { enum hlsl_base_type type = dst_type->base_type; unsigned int k; @@ -478,19 +622,35 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, switch (type) { case HLSL_TYPE_INT: + if (src2->value.u[k].i == 0) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); + return false; + } + if (src1->value.u[k].i == INT_MIN && src2->value.u[k].i == -1) + dst->u[k].i = 0; + else + dst->u[k].i = src1->value.u[k].i % src2->value.u[k].i; + break; + case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u ^ src2->value.u[k].u; + if (src2->value.u[k].u == 0) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_DIVISION_BY_ZERO, "Division by zero."); + return false; + } + dst->u[k].u = src1->value.u[k].u % src2->value.u[k].u; break; default: - FIXME("Fold bit xor for type %s.\n", debug_hlsl_type(ctx, dst_type)); + FIXME("Fold modulus for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; } -static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { enum hlsl_base_type type = dst_type->base_type; @@ -503,41 +663,60 @@ static bool fold_bit_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, { switch (type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].f = src1->value.u[k].f * src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].d = src1->value.u[k].d * src2->value.u[k].d; + break; + case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u & src2->value.u[k].u; + dst->u[k].u = src1->value.u[k].u * src2->value.u[k].u; break; default: - FIXME("Fold bit and for type %s.\n", debug_hlsl_type(ctx, dst_type)); + FIXME("Fold multiplication for type %s.\n", debug_hlsl_type(ctx, dst_type)); return false; } } return true; } -static bool fold_bit_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, +static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const struct hlsl_type *dst_type, const struct hlsl_ir_constant *src1, const struct hlsl_ir_constant *src2) { - enum hlsl_base_type type = dst_type->base_type; unsigned int k; - assert(type == src1->node.data_type->base_type); - assert(type == src2->node.data_type->base_type); + assert(dst_type->base_type == HLSL_TYPE_BOOL); + assert(src1->node.data_type->base_type == src2->node.data_type->base_type); for (k = 0; k < dst_type->dimx; ++k) { - switch (type) + switch (src1->node.data_type->base_type) { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_HALF: + dst->u[k].u = src1->value.u[k].f != src2->value.u[k].f; + break; + + case HLSL_TYPE_DOUBLE: + dst->u[k].u = src1->value.u[k].d != src2->value.u[k].d; + break; + case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - dst->u[k].u = src1->value.u[k].u | src2->value.u[k].u; + case HLSL_TYPE_BOOL: + dst->u[k].u = src1->value.u[k].u != src2->value.u[k].u; break; default: - FIXME("Fold bit or for type %s.\n", debug_hlsl_type(ctx, dst_type)); - return false; + vkd3d_unreachable(); } + + dst->u[k].u *= ~0u; } return true; } @@ -587,24 +766,42 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_neg(ctx, &res, instr->data_type, arg1); break; + case HLSL_OP1_RCP: + success = fold_rcp(ctx, &res, instr->data_type, arg1, &instr->loc); + break; + case HLSL_OP2_ADD: success = fold_add(ctx, &res, instr->data_type, arg1, arg2); break; - case HLSL_OP2_MUL: - success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_BIT_AND: + case HLSL_OP2_LOGIC_AND: + success = fold_and(ctx, &res, instr->data_type, arg1, arg2); break; - case HLSL_OP2_NEQUAL: - success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_BIT_OR: + case HLSL_OP2_LOGIC_OR: + success = fold_or(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_BIT_XOR: + success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); break; case HLSL_OP2_DIV: success = fold_div(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break; - case HLSL_OP2_MOD: - success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); + case HLSL_OP2_EQUAL: + success = fold_equal(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_GEQUAL: + success = fold_gequal(ctx, &res, instr->data_type, arg1, arg2); + break; + + case HLSL_OP2_LESS: + success = fold_less(ctx, &res, instr->data_type, arg1, arg2); break; case HLSL_OP2_MAX: @@ -615,16 +812,16 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, success = fold_min(ctx, &res, instr->data_type, arg1, arg2); break; - case HLSL_OP2_BIT_XOR: - success = fold_bit_xor(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_MOD: + success = fold_mod(ctx, &res, instr->data_type, arg1, arg2, &instr->loc); break; - case HLSL_OP2_BIT_AND: - success = fold_bit_and(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_MUL: + success = fold_mul(ctx, &res, instr->data_type, arg1, arg2); break; - case HLSL_OP2_BIT_OR: - success = fold_bit_or(ctx, &res, instr->data_type, arg1, arg2); + case HLSL_OP2_NEQUAL: + success = fold_nequal(ctx, &res, instr->data_type, arg1, arg2); break; default: diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 3542b5fac51..5535a6503d6 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -3217,7 +3217,7 @@ static bool spirv_compiler_get_register_info(const struct spirv_compiler *compil { assert(reg->idx[0].offset < compiler->temp_count); register_info->id = compiler->temp_id + reg->idx[0].offset; - register_info->storage_class = SpvStorageClassFunction; + register_info->storage_class = SpvStorageClassPrivate; register_info->descriptor_array = NULL; register_info->member_idx = 0; register_info->component_type = VKD3D_SHADER_COMPONENT_FLOAT; @@ -5258,8 +5258,7 @@ static void spirv_compiler_emit_dcl_global_flags(struct spirv_compiler *compiler WARN("Unhandled global flags %#x.\n", flags); } -static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, - const struct vkd3d_shader_instruction *instruction) +static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t count) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; size_t function_location; @@ -5270,11 +5269,11 @@ static void spirv_compiler_emit_dcl_temps(struct spirv_compiler *compiler, vkd3d_spirv_begin_function_stream_insertion(builder, function_location); assert(!compiler->temp_count); - compiler->temp_count = instruction->declaration.count; + compiler->temp_count = count; for (i = 0; i < compiler->temp_count; ++i) { - id = spirv_compiler_emit_variable(compiler, &builder->function_stream, - SpvStorageClassFunction, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); + id = spirv_compiler_emit_variable(compiler, &builder->global_stream, + SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); if (!i) compiler->temp_id = id; assert(id == compiler->temp_id + i); @@ -6236,9 +6235,6 @@ static void spirv_compiler_leave_shader_phase(struct spirv_compiler *compiler) vkd3d_spirv_build_op_function_end(builder); - compiler->temp_id = 0; - compiler->temp_count = 0; - if (is_in_control_point_phase(compiler)) { if (compiler->epilogue_function_id) @@ -9103,9 +9099,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_GLOBAL_FLAGS: spirv_compiler_emit_dcl_global_flags(compiler, instruction); break; - case VKD3DSIH_DCL_TEMPS: - spirv_compiler_emit_dcl_temps(compiler, instruction); - break; case VKD3DSIH_DCL_INDEXABLE_TEMP: spirv_compiler_emit_dcl_indexable_temp(compiler, instruction); break; @@ -9426,6 +9419,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, spirv_compiler_emit_cut_stream(compiler, instruction); break; case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: + case VKD3DSIH_DCL_TEMPS: case VKD3DSIH_HS_DECLS: case VKD3DSIH_NOP: /* nothing to do */ @@ -9448,6 +9442,9 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, enum vkd3d_result result = VKD3D_OK; unsigned int i; + if (parser->shader_desc.temp_count) + spirv_compiler_emit_temps(compiler, parser->shader_desc.temp_count); + compiler->location.column = 0; compiler->location.line = 1; diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index d066b13ee4e..290fdcb3f62 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -989,6 +989,8 @@ static void shader_sm4_read_declaration_count(struct vkd3d_shader_instruction *i uint32_t opcode_token, const uint32_t *tokens, unsigned int token_count, struct vkd3d_shader_sm4_parser *priv) { ins->declaration.count = *tokens; + if (opcode == VKD3D_SM4_OP_DCL_TEMPS) + priv->p.shader_desc.temp_count = max(priv->p.shader_desc.temp_count, *tokens); } static void shader_sm4_read_declaration_dst(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -2442,6 +2444,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi } shader_desc = &sm4->p.shader_desc; + shader_desc->is_dxil = false; if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, shader_desc)) < 0) { @@ -4369,11 +4372,31 @@ static void write_sm4_expr(struct hlsl_ctx *ctx, write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); break; + case HLSL_OP1_DSX_COARSE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSX_FINE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); + break; + case HLSL_OP1_DSY: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); break; + case HLSL_OP1_DSY_COARSE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); + break; + + case HLSL_OP1_DSY_FINE: + assert(type_is_float(dst_type)); + write_sm4_unary_op(buffer, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); + break; + case HLSL_OP1_EXP2: assert(type_is_float(dst_type)); write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); @@ -4780,19 +4803,13 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, instr.opcode = VKD3D_SM4_OP_BREAK; break; - case HLSL_IR_JUMP_DISCARD: + case HLSL_IR_JUMP_DISCARD_NZ: { - struct sm4_register *reg = &instr.srcs[0].reg; - instr.opcode = VKD3D_SM4_OP_DISCARD | VKD3D_SM4_CONDITIONAL_NZ; memset(&instr.srcs[0], 0, sizeof(*instr.srcs)); - instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE; instr.src_count = 1; - reg->type = VKD3D_SM4_RT_IMMCONST; - reg->dim = VKD3D_SM4_DIMENSION_SCALAR; - reg->immconst_uint[0] = ~0u; - + sm4_src_from_node(&instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL); break; } @@ -4800,7 +4817,7 @@ static void write_sm4_jump(struct hlsl_ctx *ctx, vkd3d_unreachable(); default: - hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type)); + hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); return; } @@ -5016,7 +5033,7 @@ static void write_sm4_resource_store(struct hlsl_ctx *ctx, if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER) { - hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.\n"); + hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented."); return; } diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index 343fdb2252e..33d8c60e59a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -415,6 +415,8 @@ static const char *shader_get_source_type_suffix(enum vkd3d_shader_source_type t return "hlsl"; case VKD3D_SHADER_SOURCE_D3D_BYTECODE: return "d3dbc"; + case VKD3D_SHADER_SOURCE_DXBC_DXIL: + return "dxil"; default: FIXME("Unhandled source type %#x.\n", type); return "bin"; @@ -440,6 +442,18 @@ void vkd3d_shader_dump_shader(enum vkd3d_shader_source_type source_type, shader_get_source_type_suffix(source_type), shader->code, shader->size); } +static void init_scan_signature_info(const struct vkd3d_shader_compile_info *info) +{ + struct vkd3d_shader_scan_signature_info *signature_info; + + if ((signature_info = vkd3d_find_struct(info->next, SCAN_SIGNATURE_INFO))) + { + memset(&signature_info->input, 0, sizeof(signature_info->input)); + memset(&signature_info->output, 0, sizeof(signature_info->output)); + memset(&signature_info->patch_constant, 0, sizeof(signature_info->patch_constant)); + } +} + bool vkd3d_shader_parser_init(struct vkd3d_shader_parser *parser, struct vkd3d_shader_message_context *message_context, const char *source_name, const struct vkd3d_shader_version *version, const struct vkd3d_shader_parser_ops *ops, @@ -526,6 +540,43 @@ void vkd3d_shader_free_messages(char *messages) vkd3d_free(messages); } +static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, + const struct shader_signature *src) +{ + unsigned int i; + + signature->element_count = src->element_count; + if (!src->elements) + { + assert(!signature->element_count); + signature->elements = NULL; + return true; + } + + if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) + return false; + + for (i = 0; i < signature->element_count; ++i) + { + struct vkd3d_shader_signature_element *d = &signature->elements[i]; + struct signature_element *e = &src->elements[i]; + + d->semantic_name = e->semantic_name; + d->semantic_index = e->semantic_index; + d->stream_index = e->stream_index; + d->sysval_semantic = e->sysval_semantic; + d->component_type = e->component_type; + d->register_index = e->register_index; + if (e->register_count > 1) + FIXME("Arrayed elements are not supported yet.\n"); + d->mask = e->mask; + d->used_mask = e->used_mask; + d->min_precision = e->min_precision; + } + + return true; +} + struct vkd3d_shader_scan_context { struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; @@ -1070,6 +1121,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser *parser) { struct vkd3d_shader_scan_descriptor_info *scan_descriptor_info; + struct vkd3d_shader_scan_signature_info *signature_info; struct vkd3d_shader_instruction *instruction; struct vkd3d_shader_scan_context context; int ret = VKD3D_OK; @@ -1080,6 +1132,7 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info scan_descriptor_info->descriptors = NULL; scan_descriptor_info->descriptor_count = 0; } + signature_info = vkd3d_find_struct(compile_info->next, SCAN_SIGNATURE_INFO); vkd3d_shader_scan_context_init(&context, compile_info, scan_descriptor_info, message_context); @@ -1099,6 +1152,21 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info } } + if (!ret && signature_info) + { + if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) + || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, + &parser->shader_desc.output_signature) + || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, + &parser->shader_desc.patch_constant_signature)) + { + vkd3d_shader_free_scan_signature_info(signature_info); + if (scan_descriptor_info) + vkd3d_shader_free_scan_descriptor_info(scan_descriptor_info); + ret = VKD3D_ERROR_OUT_OF_MEMORY; + } + } + vkd3d_shader_scan_context_cleanup(&context); return ret; } @@ -1139,6 +1207,24 @@ static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, return ret; } +static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_parser *parser; + int ret; + + if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) + { + WARN("Failed to initialise shader parser.\n"); + return ret; + } + + ret = scan_with_parser(compile_info, message_context, parser); + vkd3d_shader_parser_destroy(parser); + + return ret; +} + int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) { struct vkd3d_shader_message_context message_context; @@ -1152,6 +1238,8 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char if ((ret = vkd3d_shader_validate_compile_info(compile_info, false)) < 0) return ret; + init_scan_signature_info(compile_info); + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); switch (compile_info->source_type) @@ -1169,6 +1257,10 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char ret = scan_d3dbc(compile_info, &message_context); break; + case VKD3D_SHADER_SOURCE_DXBC_DXIL: + ret = scan_dxil(compile_info, &message_context); + break; + default: ERR("Unsupported source type %#x.\n", compile_info->source_type); ret = VKD3D_ERROR_INVALID_ARGUMENT; @@ -1291,6 +1383,24 @@ static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_ return VKD3D_ERROR; } +static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_shader_parser *parser; + int ret; + + if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) + { + WARN("Failed to initialise shader parser.\n"); + return ret; + } + + ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); + + vkd3d_shader_parser_destroy(parser); + return ret; +} + int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, char **messages) { @@ -1305,6 +1415,8 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, if ((ret = vkd3d_shader_validate_compile_info(compile_info, true)) < 0) return ret; + init_scan_signature_info(compile_info); + vkd3d_shader_message_context_init(&message_context, compile_info->log_level); switch (compile_info->source_type) @@ -1321,6 +1433,10 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, ret = compile_d3d_bytecode(compile_info, out, &message_context); break; + case VKD3D_SHADER_SOURCE_DXBC_DXIL: + ret = compile_dxbc_dxil(compile_info, out, &message_context); + break; + default: vkd3d_unreachable(); } @@ -1339,6 +1455,15 @@ void vkd3d_shader_free_scan_descriptor_info(struct vkd3d_shader_scan_descriptor_ vkd3d_free(scan_descriptor_info->descriptors); } +void vkd3d_shader_free_scan_signature_info(struct vkd3d_shader_scan_signature_info *info) +{ + TRACE("info %p.\n", info); + + vkd3d_shader_free_shader_signature(&info->input); + vkd3d_shader_free_shader_signature(&info->output); + vkd3d_shader_free_shader_signature(&info->patch_constant); +} + void vkd3d_shader_free_shader_code(struct vkd3d_shader_code *shader_code) { TRACE("shader_code %p.\n", shader_code); @@ -1401,43 +1526,6 @@ void vkd3d_shader_free_root_signature(struct vkd3d_shader_versioned_root_signatu desc->version = 0; } -static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_signature *signature, - const struct shader_signature *src) -{ - unsigned int i; - - signature->element_count = src->element_count; - if (!src->elements) - { - assert(!signature->element_count); - signature->elements = NULL; - return true; - } - - if (!(signature->elements = vkd3d_calloc(signature->element_count, sizeof(*signature->elements)))) - return false; - - for (i = 0; i < signature->element_count; ++i) - { - struct vkd3d_shader_signature_element *d = &signature->elements[i]; - struct signature_element *e = &src->elements[i]; - - d->semantic_name = e->semantic_name; - d->semantic_index = e->semantic_index; - d->stream_index = e->stream_index; - d->sysval_semantic = e->sysval_semantic; - d->component_type = e->component_type; - d->register_index = e->register_index; - if (e->register_count > 1) - FIXME("Arrayed elements are not supported yet.\n"); - d->mask = e->mask; - d->used_mask = e->used_mask; - d->min_precision = e->min_precision; - } - - return true; -} - void shader_signature_cleanup(struct shader_signature *signature) { vkd3d_free(signature->elements); @@ -1526,6 +1614,7 @@ const enum vkd3d_shader_source_type *vkd3d_shader_get_supported_source_types(uns VKD3D_SHADER_SOURCE_DXBC_TPF, VKD3D_SHADER_SOURCE_HLSL, VKD3D_SHADER_SOURCE_D3D_BYTECODE, + VKD3D_SHADER_SOURCE_DXBC_DXIL, }; TRACE("count %p.\n", count); @@ -1564,6 +1653,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( switch (source_type) { + case VKD3D_SHADER_SOURCE_DXBC_DXIL: case VKD3D_SHADER_SOURCE_DXBC_TPF: *count = ARRAY_SIZE(dxbc_tpf_types); return dxbc_tpf_types; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 406d53a3391..85fca964227 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -145,8 +145,21 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE = 7002, VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE = 7003, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY = 7004, + VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX = 7005, + VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC = 7006, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS= 7300, + + VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY = 8000, + VKD3D_SHADER_ERROR_DXIL_INVALID_SIZE = 8001, + VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_OFFSET = 8002, + VKD3D_SHADER_ERROR_DXIL_INVALID_CHUNK_SIZE = 8003, + VKD3D_SHADER_ERROR_DXIL_INVALID_BITCODE = 8004, + + VKD3D_SHADER_WARNING_DXIL_UNKNOWN_MAGIC_NUMBER = 8300, + VKD3D_SHADER_WARNING_DXIL_UNKNOWN_SHADER_TYPE = 8301, + VKD3D_SHADER_WARNING_DXIL_INVALID_BLOCK_LENGTH = 8302, + VKD3D_SHADER_WARNING_DXIL_INVALID_MODULE_LENGTH = 8303, }; enum vkd3d_shader_opcode @@ -802,6 +815,7 @@ struct signature_element struct shader_signature { struct signature_element *elements; + size_t elements_capacity; unsigned int element_count; }; @@ -811,9 +825,12 @@ struct vkd3d_shader_desc { const uint32_t *byte_code; size_t byte_code_size; + bool is_dxil; struct shader_signature input_signature; struct shader_signature output_signature; struct shader_signature patch_constant_signature; + + uint32_t temp_count; }; struct vkd3d_shader_register_semantic @@ -1167,6 +1184,8 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); +int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, + struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); void free_shader_desc(struct vkd3d_shader_desc *desc); @@ -1339,6 +1358,7 @@ static inline void *vkd3d_find_struct_(const struct vkd3d_struct *chain, } #define VKD3D_DXBC_HEADER_SIZE (8 * sizeof(uint32_t)) +#define VKD3D_DXBC_CHUNK_ALIGNMENT sizeof(uint32_t) #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index 32439eec7eb..53cb5d9582c 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -26,6 +26,7 @@ static HRESULT d3d12_fence_signal(struct d3d12_fence *fence, uint64_t value, VkF static void d3d12_fence_signal_timeline_semaphore(struct d3d12_fence *fence, uint64_t timeline_value); static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_queue, struct d3d12_fence *fence, uint64_t value); +static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue); static HRESULT d3d12_command_queue_flush_ops(struct d3d12_command_queue *queue, bool *flushed_any); static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue *queue, bool *flushed_any); @@ -3624,7 +3625,7 @@ static HRESULT d3d12_command_list_allocate_transfer_buffer(struct d3d12_command_ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_command_list *list, struct d3d12_resource *dst_resource, unsigned int dst_sub_resource_idx, const struct vkd3d_format *dst_format, struct d3d12_resource *src_resource, - unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format) + unsigned int src_sub_resource_idx, const struct vkd3d_format *src_format, unsigned int layer_count) { const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; const D3D12_RESOURCE_DESC *dst_desc = &dst_resource->desc; @@ -3651,6 +3652,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com buffer_image_copy.bufferImageHeight = 0; vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, src_format, src_sub_resource_idx, src_desc->MipLevels); + buffer_image_copy.imageSubresource.layerCount = layer_count; src_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; buffer_image_copy.imageOffset.x = 0; buffer_image_copy.imageOffset.y = 0; @@ -3658,7 +3660,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com vk_extent_3d_from_d3d12_miplevel(&buffer_image_copy.imageExtent, src_desc, src_miplevel_idx); buffer_size = src_format->byte_count * buffer_image_copy.imageExtent.width * - buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth; + buffer_image_copy.imageExtent.height * buffer_image_copy.imageExtent.depth * layer_count; if (FAILED(hr = d3d12_command_list_allocate_transfer_buffer(list, buffer_size, &transfer_buffer))) { ERR("Failed to allocate transfer buffer, hr %#x.\n", hr); @@ -3684,6 +3686,7 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com vk_image_subresource_layers_from_d3d12(&buffer_image_copy.imageSubresource, dst_format, dst_sub_resource_idx, dst_desc->MipLevels); + buffer_image_copy.imageSubresource.layerCount = layer_count; dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == @@ -3813,7 +3816,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic { d3d12_command_list_copy_incompatible_texture_region(list, dst_resource, dst->u.SubresourceIndex, dst_format, - src_resource, src->u.SubresourceIndex, src_format); + src_resource, src->u.SubresourceIndex, src_format, 1); return; } @@ -3835,6 +3838,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm { struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList2(iface); struct d3d12_resource *dst_resource, *src_resource; + const struct vkd3d_format *dst_format, *src_format; const struct vkd3d_vk_device_procs *vk_procs; VkBufferCopy vk_buffer_copy; VkImageCopy vk_image_copy; @@ -3867,16 +3871,29 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm else { layer_count = d3d12_resource_desc_get_layer_count(&dst_resource->desc); + dst_format = dst_resource->format; + src_format = src_resource->format; assert(d3d12_resource_is_texture(dst_resource)); assert(d3d12_resource_is_texture(src_resource)); assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); + if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) + { + for (i = 0; i < dst_resource->desc.MipLevels; ++i) + { + d3d12_command_list_copy_incompatible_texture_region(list, + dst_resource, i, dst_format, + src_resource, i, src_format, layer_count); + } + return; + } + for (i = 0; i < dst_resource->desc.MipLevels; ++i) { vk_image_copy_from_d3d12(&vk_image_copy, i, i, &src_resource->desc, &dst_resource->desc, - src_resource->format, dst_resource->format, NULL, 0, 0, 0); + src_format, dst_format, NULL, 0, 0, 0); vk_image_copy.dstSubresource.layerCount = layer_count; vk_image_copy.srcSubresource.layerCount = layer_count; VK_CALL(vkCmdCopyImage(list->vk_command_buffer, src_resource->u.vk_image, @@ -6063,8 +6080,35 @@ static ULONG STDMETHODCALLTYPE d3d12_command_queue_AddRef(ID3D12CommandQueue *if return refcount; } +static void d3d12_command_queue_destroy_op(struct vkd3d_cs_op_data *op) +{ + switch (op->opcode) + { + case VKD3D_CS_OP_WAIT: + d3d12_fence_decref(op->u.wait.fence); + break; + + case VKD3D_CS_OP_SIGNAL: + d3d12_fence_decref(op->u.signal.fence); + break; + + case VKD3D_CS_OP_EXECUTE: + vkd3d_free(op->u.execute.buffers); + break; + + case VKD3D_CS_OP_UPDATE_MAPPINGS: + case VKD3D_CS_OP_COPY_MAPPINGS: + break; + } +} + static void d3d12_command_queue_op_array_destroy(struct d3d12_command_queue_op_array *array) { + unsigned int i; + + for (i = 0; i < array->count; ++i) + d3d12_command_queue_destroy_op(&array->ops[i]); + vkd3d_free(array->ops); } @@ -6162,17 +6206,131 @@ static struct vkd3d_cs_op_data *d3d12_command_queue_op_array_require_space(struc return &array->ops[array->count++]; } +static bool clone_array_parameter(void **dst, const void *src, size_t elem_size, unsigned int count) +{ + void *buffer; + + *dst = NULL; + if (src) + { + if (!(buffer = vkd3d_calloc(count, elem_size))) + return false; + memcpy(buffer, src, count * elem_size); + *dst = buffer; + } + return true; +} + +static void update_mappings_cleanup(struct vkd3d_cs_update_mappings *update_mappings) +{ + vkd3d_free(update_mappings->region_start_coordinates); + vkd3d_free(update_mappings->region_sizes); + vkd3d_free(update_mappings->range_flags); + vkd3d_free(update_mappings->heap_range_offsets); + vkd3d_free(update_mappings->range_tile_counts); +} + static void STDMETHODCALLTYPE d3d12_command_queue_UpdateTileMappings(ID3D12CommandQueue *iface, ID3D12Resource *resource, UINT region_count, const D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates, const D3D12_TILE_REGION_SIZE *region_sizes, ID3D12Heap *heap, UINT range_count, const D3D12_TILE_RANGE_FLAGS *range_flags, - UINT *heap_range_offsets, UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) + const UINT *heap_range_offsets, const UINT *range_tile_counts, D3D12_TILE_MAPPING_FLAGS flags) { - FIXME("iface %p, resource %p, region_count %u, region_start_coordinates %p, " + struct d3d12_resource *resource_impl = unsafe_impl_from_ID3D12Resource(resource); + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct d3d12_heap *heap_impl = unsafe_impl_from_ID3D12Heap(heap); + struct vkd3d_cs_update_mappings update_mappings = {0}; + struct vkd3d_cs_op_data *op; + + TRACE("iface %p, resource %p, region_count %u, region_start_coordinates %p, " "region_sizes %p, heap %p, range_count %u, range_flags %p, heap_range_offsets %p, " - "range_tile_counts %p, flags %#x stub!\n", + "range_tile_counts %p, flags %#x.\n", iface, resource, region_count, region_start_coordinates, region_sizes, heap, range_count, range_flags, heap_range_offsets, range_tile_counts, flags); + + if (!region_count || !range_count) + return; + + if (!command_queue->supports_sparse_binding) + { + FIXME("Command queue %p does not support sparse binding.\n", command_queue); + return; + } + + if (!resource_impl->tiles.subresource_count) + { + WARN("Resource %p is not a tiled resource.\n", resource_impl); + return; + } + + if (region_count > 1 && !region_start_coordinates) + { + WARN("Region start coordinates must not be NULL when region count is > 1.\n"); + return; + } + + if (range_count > 1 && !range_tile_counts) + { + WARN("Range tile counts must not be NULL when range count is > 1.\n"); + return; + } + + update_mappings.resource = resource_impl; + update_mappings.heap = heap_impl; + if (!clone_array_parameter((void **)&update_mappings.region_start_coordinates, + region_start_coordinates, sizeof(*region_start_coordinates), region_count)) + { + ERR("Failed to allocate region start coordinates.\n"); + return; + } + if (!clone_array_parameter((void **)&update_mappings.region_sizes, + region_sizes, sizeof(*region_sizes), region_count)) + { + ERR("Failed to allocate region sizes.\n"); + goto free_clones; + } + if (!clone_array_parameter((void **)&update_mappings.range_flags, + range_flags, sizeof(*range_flags), range_count)) + { + ERR("Failed to allocate range flags.\n"); + goto free_clones; + } + if (!clone_array_parameter((void **)&update_mappings.heap_range_offsets, + heap_range_offsets, sizeof(*heap_range_offsets), range_count)) + { + ERR("Failed to allocate heap range offsets.\n"); + goto free_clones; + } + if (!clone_array_parameter((void **)&update_mappings.range_tile_counts, + range_tile_counts, sizeof(*range_tile_counts), range_count)) + { + ERR("Failed to allocate range tile counts.\n"); + goto free_clones; + } + update_mappings.region_count = region_count; + update_mappings.range_count = range_count; + update_mappings.flags = flags; + + vkd3d_mutex_lock(&command_queue->op_mutex); + + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { + ERR("Failed to add op.\n"); + goto unlock_mutex; + } + + op->opcode = VKD3D_CS_OP_UPDATE_MAPPINGS; + op->u.update_mappings = update_mappings; + + d3d12_command_queue_submit_locked(command_queue); + + vkd3d_mutex_unlock(&command_queue->op_mutex); + return; + +unlock_mutex: + vkd3d_mutex_unlock(&command_queue->op_mutex); +free_clones: + update_mappings_cleanup(&update_mappings); } static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12CommandQueue *iface, @@ -6183,10 +6341,34 @@ static void STDMETHODCALLTYPE d3d12_command_queue_CopyTileMappings(ID3D12Command const D3D12_TILE_REGION_SIZE *region_size, D3D12_TILE_MAPPING_FLAGS flags) { - FIXME("iface %p, dst_resource %p, dst_region_start_coordinate %p, " - "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x stub!\n", + struct d3d12_resource *dst_resource_impl = impl_from_ID3D12Resource(dst_resource); + struct d3d12_resource *src_resource_impl = impl_from_ID3D12Resource(src_resource); + struct d3d12_command_queue *command_queue = impl_from_ID3D12CommandQueue(iface); + struct vkd3d_cs_op_data *op; + + TRACE("iface %p, dst_resource %p, dst_region_start_coordinate %p, " + "src_resource %p, src_region_start_coordinate %p, region_size %p, flags %#x.\n", iface, dst_resource, dst_region_start_coordinate, src_resource, src_region_start_coordinate, region_size, flags); + + vkd3d_mutex_lock(&command_queue->op_mutex); + + if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) + { + ERR("Failed to add op.\n"); + return; + } + op->opcode = VKD3D_CS_OP_COPY_MAPPINGS; + op->u.copy_mappings.dst_resource = dst_resource_impl; + op->u.copy_mappings.src_resource = src_resource_impl; + op->u.copy_mappings.dst_region_start_coordinate = *dst_region_start_coordinate; + op->u.copy_mappings.src_region_start_coordinate = *src_region_start_coordinate; + op->u.copy_mappings.region_size = *region_size; + op->u.copy_mappings.flags = flags; + + d3d12_command_queue_submit_locked(command_queue); + + vkd3d_mutex_unlock(&command_queue->op_mutex); } static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queue, @@ -6214,8 +6396,6 @@ static void d3d12_command_queue_execute(struct d3d12_command_queue *command_queu ERR("Failed to submit queue(s), vr %d.\n", vr); vkd3d_queue_release(vkd3d_queue); - - vkd3d_free(buffers); } static void d3d12_command_queue_submit_locked(struct d3d12_command_queue *queue) @@ -6273,7 +6453,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) { ERR("Failed to add op.\n"); - return; + goto done; } op->opcode = VKD3D_CS_OP_EXECUTE; op->u.execute.buffers = buffers; @@ -6281,6 +6461,7 @@ static void STDMETHODCALLTYPE d3d12_command_queue_ExecuteCommandLists(ID3D12Comm d3d12_command_queue_submit_locked(command_queue); +done: vkd3d_mutex_unlock(&command_queue->op_mutex); return; } @@ -6348,6 +6529,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Signal(ID3D12CommandQueue * if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) { + ERR("Failed to add op.\n"); hr = E_OUTOFMEMORY; goto done; } @@ -6686,6 +6868,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_queue_Wait(ID3D12CommandQueue *if if (!(op = d3d12_command_queue_op_array_require_space(&command_queue->op_queue))) { + ERR("Failed to add op.\n"); hr = E_OUTOFMEMORY; goto done; } @@ -6922,22 +7105,31 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * return d3d12_command_queue_fixup_after_flush_locked(queue); } d3d12_command_queue_wait_locked(queue, fence, op->u.wait.value); - d3d12_fence_decref(fence); break; case VKD3D_CS_OP_SIGNAL: d3d12_command_queue_signal(queue, op->u.signal.fence, op->u.signal.value); - d3d12_fence_decref(op->u.signal.fence); break; case VKD3D_CS_OP_EXECUTE: d3d12_command_queue_execute(queue, op->u.execute.buffers, op->u.execute.buffer_count); break; + case VKD3D_CS_OP_UPDATE_MAPPINGS: + FIXME("Tiled resource binding is not supported yet.\n"); + update_mappings_cleanup(&op->u.update_mappings); + break; + + case VKD3D_CS_OP_COPY_MAPPINGS: + FIXME("Tiled resource mapping copying is not supported yet.\n"); + break; + default: vkd3d_unreachable(); } + d3d12_command_queue_destroy_op(op); + *flushed_any |= true; } @@ -7000,6 +7192,8 @@ static HRESULT d3d12_command_queue_init(struct d3d12_command_queue *queue, if (FAILED(hr = vkd3d_fence_worker_start(&queue->fence_worker, queue->vkd3d_queue, device))) goto fail_destroy_op_mutex; + queue->supports_sparse_binding = !!(queue->vkd3d_queue->vk_queue_flags & VK_QUEUE_SPARSE_BINDING_BIT); + d3d12_device_add_ref(queue->device = device); return S_OK; diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index 4263dcf4184..b9a8943cc08 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -1464,6 +1464,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->device_limits = physical_device_info->properties2.properties.limits; vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; + vulkan_info->sparse_binding = features->sparseBinding; + vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; vulkan_info->transform_feedback_queries = physical_device_info->xfb_properties.transformFeedbackQueries; vulkan_info->uav_read_without_format = features->shaderStorageImageReadWithoutFormat; diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index ea7b6859cc1..4c07d326504 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -779,6 +779,7 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, VkImageFormatListCreateInfoKHR format_list; const struct vkd3d_format *format; VkImageCreateInfo image_info; + uint32_t count; VkResult vr; if (resource) @@ -914,6 +915,20 @@ static HRESULT vkd3d_create_image(struct d3d12_device *device, if (resource && image_info.tiling == VK_IMAGE_TILING_LINEAR) resource->flags |= VKD3D_RESOURCE_LINEAR_TILING; + if (sparse_resource) + { + count = 0; + VK_CALL(vkGetPhysicalDeviceSparseImageFormatProperties(device->vk_physical_device, image_info.format, + image_info.imageType, image_info.samples, image_info.usage, image_info.tiling, &count, NULL)); + + if (!count) + { + FIXME("Sparse images are not supported with format %u, type %u, samples %u, usage %#x.\n", + image_info.format, image_info.imageType, image_info.samples, image_info.usage); + return E_INVALIDARG; + } + } + if ((vr = VK_CALL(vkCreateImage(device->vk_device, &image_info, NULL, vk_image))) < 0) WARN("Failed to create Vulkan image, vr %d.\n", vr); @@ -928,6 +943,7 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, D3D12_RESOURCE_DESC validated_desc; VkMemoryRequirements requirements; VkImage vk_image; + bool tiled; HRESULT hr; assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); @@ -940,8 +956,10 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, desc = &validated_desc; } + tiled = desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE; + /* XXX: We have to create an image to get its memory requirements. */ - if (SUCCEEDED(hr = vkd3d_create_image(device, &heap_properties, 0, desc, NULL, &vk_image))) + if (SUCCEEDED(hr = vkd3d_create_image(device, tiled ? NULL : &heap_properties, 0, desc, NULL, &vk_image))) { VK_CALL(vkGetImageMemoryRequirements(device->vk_device, vk_image, &requirements)); VK_CALL(vkDestroyImage(device->vk_device, vk_image, NULL)); @@ -1039,12 +1057,12 @@ static void d3d12_resource_get_level_box(const struct d3d12_resource *resource, box->back = d3d12_resource_desc_get_depth(&resource->desc, level); } -/* ID3D12Resource */ -static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) +static void d3d12_resource_init_tiles(struct d3d12_resource *resource) { - return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); + resource->tiles.subresource_count = d3d12_resource_desc_get_sub_resource_count(&resource->desc); } +/* ID3D12Resource */ static HRESULT STDMETHODCALLTYPE d3d12_resource_QueryInterface(ID3D12Resource *iface, REFIID riid, void **object) { @@ -1661,6 +1679,21 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC *desc, struct d3d return E_INVALIDARG; } + if (desc->Layout == D3D12_TEXTURE_LAYOUT_64KB_UNDEFINED_SWIZZLE) + { + if (desc->Dimension == D3D12_RESOURCE_DIMENSION_TEXTURE3D && !device->vk_info.sparse_residency_3d) + { + WARN("The device does not support tiled 3D images.\n"); + return E_INVALIDARG; + } + if (format->plane_count > 1) + { + WARN("Invalid format %#x. D3D12 does not support multiplanar formats for tiled resources.\n", + format->dxgi_format); + return E_INVALIDARG; + } + } + if (!d3d12_resource_validate_texture_format(desc, format) || !d3d12_resource_validate_texture_alignment(desc, format)) return E_INVALIDARG; @@ -1722,6 +1755,12 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 resource->desc = *desc; + if (!heap_properties && !device->vk_info.sparse_binding) + { + WARN("The device does not support tiled images.\n"); + return E_INVALIDARG; + } + if (heap_properties && !d3d12_resource_validate_heap_properties(resource, heap_properties, initial_state)) return E_INVALIDARG; @@ -1787,6 +1826,8 @@ static HRESULT d3d12_resource_init(struct d3d12_resource *resource, struct d3d12 resource->heap = NULL; resource->heap_offset = 0; + memset(&resource->tiles, 0, sizeof(resource->tiles)); + if (FAILED(hr = vkd3d_private_store_init(&resource->private_store))) { d3d12_resource_destroy(resource, device); @@ -1972,6 +2013,8 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, desc, initial_state, optimized_clear_value, &object))) return hr; + d3d12_resource_init_tiles(object); + TRACE("Created reserved resource %p.\n", object); *resource = object; diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 5e46b467252..2d8138245d8 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -20,6 +20,7 @@ #include "vkd3d_private.h" #include "vkd3d_shaders.h" +#include "vkd3d_shader_utils.h" /* ID3D12RootSignature */ static inline struct d3d12_root_signature *impl_from_ID3D12RootSignature(ID3D12RootSignature *iface) @@ -1978,14 +1979,14 @@ static HRESULT create_shader_stage(struct d3d12_device *device, compile_info.next = shader_interface; compile_info.source.code = code->pShaderBytecode; compile_info.source.size = code->BytecodeLength; - compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; compile_info.options = options; compile_info.option_count = ARRAY_SIZE(options); compile_info.log_level = VKD3D_SHADER_LOG_NONE; compile_info.source_name = NULL; - if ((ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) + if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0 + || (ret = vkd3d_shader_compile(&compile_info, &spirv, NULL)) < 0) { WARN("Failed to compile shader, vkd3d result %d.\n", ret); return hresult_from_vkd3d_result(ret); @@ -2008,6 +2009,7 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER struct vkd3d_shader_scan_descriptor_info *descriptor_info) { struct vkd3d_shader_compile_info compile_info; + enum vkd3d_result ret; const struct vkd3d_shader_compile_option options[] = { @@ -2019,13 +2021,15 @@ static int vkd3d_scan_dxbc(const struct d3d12_device *device, const D3D12_SHADER compile_info.next = descriptor_info; compile_info.source.code = code->pShaderBytecode; compile_info.source.size = code->BytecodeLength; - compile_info.source_type = VKD3D_SHADER_SOURCE_DXBC_TPF; compile_info.target_type = VKD3D_SHADER_TARGET_SPIRV_BINARY; compile_info.options = options; compile_info.option_count = ARRAY_SIZE(options); compile_info.log_level = VKD3D_SHADER_LOG_NONE; compile_info.source_name = NULL; + if ((ret = vkd3d_shader_parse_dxbc_source_type(&compile_info.source, &compile_info.source_type, NULL)) < 0) + return ret; + return vkd3d_shader_scan(&compile_info, NULL); } diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index b0150754434..c5259420acf 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -147,9 +147,12 @@ struct vkd3d_vulkan_info unsigned int max_vertex_attrib_divisor; VkPhysicalDeviceLimits device_limits; - VkPhysicalDeviceSparseProperties sparse_properties; struct vkd3d_device_descriptor_limits descriptor_limits; + VkPhysicalDeviceSparseProperties sparse_properties; + bool sparse_binding; + bool sparse_residency_3d; + VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT texel_buffer_alignment_properties; unsigned int shader_extension_count; @@ -670,6 +673,11 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface); #define VKD3D_RESOURCE_DEDICATED_HEAP 0x00000008 #define VKD3D_RESOURCE_LINEAR_TILING 0x00000010 +struct d3d12_resource_tile_info +{ + unsigned int subresource_count; +}; + /* ID3D12Resource */ struct d3d12_resource { @@ -698,9 +706,16 @@ struct d3d12_resource struct d3d12_device *device; + struct d3d12_resource_tile_info tiles; + struct vkd3d_private_store private_store; }; +static inline struct d3d12_resource *impl_from_ID3D12Resource(ID3D12Resource *iface) +{ + return CONTAINING_RECORD(iface, struct d3d12_resource, ID3D12Resource_iface); +} + static inline bool d3d12_resource_is_buffer(const struct d3d12_resource *resource) { return resource->desc.Dimension == D3D12_RESOURCE_DIMENSION_BUFFER; @@ -1454,6 +1469,8 @@ enum vkd3d_cs_op VKD3D_CS_OP_WAIT, VKD3D_CS_OP_SIGNAL, VKD3D_CS_OP_EXECUTE, + VKD3D_CS_OP_UPDATE_MAPPINGS, + VKD3D_CS_OP_COPY_MAPPINGS, }; struct vkd3d_cs_wait @@ -1474,6 +1491,30 @@ struct vkd3d_cs_execute unsigned int buffer_count; }; +struct vkd3d_cs_update_mappings +{ + struct d3d12_resource *resource; + struct d3d12_heap *heap; + D3D12_TILED_RESOURCE_COORDINATE *region_start_coordinates; + D3D12_TILE_REGION_SIZE *region_sizes; + D3D12_TILE_RANGE_FLAGS *range_flags; + UINT *heap_range_offsets; + UINT *range_tile_counts; + UINT region_count; + UINT range_count; + D3D12_TILE_MAPPING_FLAGS flags; +}; + +struct vkd3d_cs_copy_mappings +{ + struct d3d12_resource *dst_resource; + struct d3d12_resource *src_resource; + D3D12_TILED_RESOURCE_COORDINATE dst_region_start_coordinate; + D3D12_TILED_RESOURCE_COORDINATE src_region_start_coordinate; + D3D12_TILE_REGION_SIZE region_size; + D3D12_TILE_MAPPING_FLAGS flags; +}; + struct vkd3d_cs_op_data { enum vkd3d_cs_op opcode; @@ -1482,6 +1523,8 @@ struct vkd3d_cs_op_data struct vkd3d_cs_wait wait; struct vkd3d_cs_signal signal; struct vkd3d_cs_execute execute; + struct vkd3d_cs_update_mappings update_mappings; + struct vkd3d_cs_copy_mappings copy_mappings; } u; }; @@ -1519,6 +1562,8 @@ struct d3d12_command_queue * set, aux_op_queue.count must be zero. */ struct d3d12_command_queue_op_array aux_op_queue; + bool supports_sparse_binding; + struct vkd3d_private_store private_store; }; -- 2.40.1