From 6a483e26d5143a3faaa3365f7cf728eec9b231d9 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 7 Mar 2024 10:40:41 +1100 Subject: [PATCH] Updated vkd3d to f318e565f295d9f439e0e9ec52ba28835b33a9ce. --- libs/vkd3d/include/private/vkd3d_common.h | 76 +- libs/vkd3d/include/private/vkd3d_memory.h | 3 +- libs/vkd3d/include/vkd3d_shader.h | 243 +++- libs/vkd3d/libs/vkd3d-common/blob.c | 1 + libs/vkd3d/libs/vkd3d-common/debug.c | 14 +- libs/vkd3d/libs/vkd3d-shader/checksum.c | 2 +- libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 29 +- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 718 ++++++---- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 6 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 160 ++- libs/vkd3d/libs/vkd3d-shader/fx.c | 1229 ++++++++++++++-- libs/vkd3d/libs/vkd3d-shader/glsl.c | 198 ++- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 379 ++++- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 167 ++- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 136 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 1259 +++++++++++++---- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 1235 +++++++++++++--- .../libs/vkd3d-shader/hlsl_constant_ops.c | 110 +- libs/vkd3d/libs/vkd3d-shader/ir.c | 699 +++++++-- libs/vkd3d/libs/vkd3d-shader/preproc.h | 2 +- libs/vkd3d/libs/vkd3d-shader/preproc.l | 5 +- libs/vkd3d/libs/vkd3d-shader/preproc.y | 2 +- libs/vkd3d/libs/vkd3d-shader/spirv.c | 629 ++++---- libs/vkd3d/libs/vkd3d-shader/tpf.c | 625 +++++--- .../libs/vkd3d-shader/vkd3d_shader_main.c | 42 +- .../libs/vkd3d-shader/vkd3d_shader_private.h | 34 +- libs/vkd3d/libs/vkd3d/cache.c | 9 +- libs/vkd3d/libs/vkd3d/command.c | 595 ++++---- libs/vkd3d/libs/vkd3d/device.c | 190 ++- libs/vkd3d/libs/vkd3d/resource.c | 58 +- libs/vkd3d/libs/vkd3d/state.c | 262 +++- libs/vkd3d/libs/vkd3d/utils.c | 4 +- libs/vkd3d/libs/vkd3d/vkd3d_main.c | 8 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 15 +- 34 files changed, 6981 insertions(+), 2163 deletions(-) diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index a9d709d10fe..c62dc00415f 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -46,12 +46,22 @@ #define STATIC_ASSERT(e) extern void __VKD3D_STATIC_ASSERT__(int [(e) ? 1 : -1]) +#define VKD3D_ASSERT(cond) \ + do { \ + if (!(cond)) \ + ERR("Failed assertion: %s\n", #cond); \ + } while (0) + #define MEMBER_SIZE(t, m) sizeof(((t *)0)->m) #define VKD3D_MAKE_TAG(ch0, ch1, ch2, ch3) \ ((uint32_t)(ch0) | ((uint32_t)(ch1) << 8) \ | ((uint32_t)(ch2) << 16) | ((uint32_t)(ch3) << 24)) +#define VKD3D_EXPAND(x) x +#define VKD3D_STRINGIFY(x) #x +#define VKD3D_EXPAND_AND_STRINGIFY(x) VKD3D_EXPAND(VKD3D_STRINGIFY(x)) + #define TAG_AON9 VKD3D_MAKE_TAG('A', 'o', 'n', '9') #define TAG_DXBC VKD3D_MAKE_TAG('D', 'X', 'B', 'C') #define TAG_DXIL VKD3D_MAKE_TAG('D', 'X', 'I', 'L') @@ -98,17 +108,11 @@ static inline uint64_t align(uint64_t addr, size_t alignment) # define VKD3D_UNREACHABLE (void)0 #endif /* __GNUC__ */ -VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsigned int line) -{ - fprintf(stderr, "%s:%u: Aborting, reached unreachable code.\n", filename, line); - abort(); -} - -#ifdef NDEBUG -#define vkd3d_unreachable() VKD3D_UNREACHABLE -#else -#define vkd3d_unreachable() vkd3d_unreachable_(__FILE__, __LINE__) -#endif +#define vkd3d_unreachable() \ + do { \ + ERR("%s:%u: Unreachable code reached.\n", __FILE__, __LINE__); \ + VKD3D_UNREACHABLE; \ + } while (0) #ifdef VKD3D_NO_TRACE_MESSAGES #define TRACE(args...) do { } while (0) @@ -118,11 +122,19 @@ VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsig #ifdef VKD3D_NO_DEBUG_MESSAGES #define WARN(args...) do { } while (0) #define FIXME(args...) do { } while (0) +#define WARN_ON() (false) +#define FIXME_ONCE(args...) do { } while (0) +#endif + +#ifdef VKD3D_NO_ERROR_MESSAGES +#define ERR(args...) do { } while (0) +#define MESSAGE(args...) do { } while (0) #endif enum vkd3d_dbg_level { VKD3D_DBG_LEVEL_NONE, + VKD3D_DBG_LEVEL_MESSAGE, VKD3D_DBG_LEVEL_ERR, VKD3D_DBG_LEVEL_FIXME, VKD3D_DBG_LEVEL_WARN, @@ -143,7 +155,7 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); #define VKD3D_DBG_LOG(level) \ do { \ const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \ - VKD3D_DBG_PRINTF + VKD3D_DBG_PRINTF_##level #define VKD3D_DBG_LOG_ONCE(first_time_level, level) \ do { \ @@ -151,24 +163,50 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \ ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \ vkd3d_dbg_next_time = true; \ - VKD3D_DBG_PRINTF + VKD3D_DBG_PRINTF_##level #define VKD3D_DBG_PRINTF(...) \ vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) +#define VKD3D_DBG_PRINTF_TRACE(...) VKD3D_DBG_PRINTF(__VA_ARGS__) +#define VKD3D_DBG_PRINTF_WARN(...) VKD3D_DBG_PRINTF(__VA_ARGS__) +#define VKD3D_DBG_PRINTF_FIXME(...) VKD3D_DBG_PRINTF(__VA_ARGS__) +#define VKD3D_DBG_PRINTF_MESSAGE(...) VKD3D_DBG_PRINTF(__VA_ARGS__) + +#ifdef VKD3D_ABORT_ON_ERR +#define VKD3D_DBG_PRINTF_ERR(...) \ + vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); \ + abort(); \ + } while (0) +#else +#define VKD3D_DBG_PRINTF_ERR(...) VKD3D_DBG_PRINTF(__VA_ARGS__) +#endif + +/* Used by vkd3d_unreachable(). */ +#ifdef VKD3D_CROSSTEST +#undef ERR +#define ERR(...) do { fprintf(stderr, __VA_ARGS__); abort(); } while (0) +#endif + #ifndef TRACE -#define TRACE VKD3D_DBG_LOG(TRACE) +#define TRACE VKD3D_DBG_LOG(TRACE) #endif #ifndef WARN -#define WARN VKD3D_DBG_LOG(WARN) +#define WARN VKD3D_DBG_LOG(WARN) #endif #ifndef FIXME -#define FIXME VKD3D_DBG_LOG(FIXME) +#define FIXME VKD3D_DBG_LOG(FIXME) #endif -#define ERR VKD3D_DBG_LOG(ERR) +#ifndef ERR +#define ERR VKD3D_DBG_LOG(ERR) +#endif + +#ifndef MESSAGE +#define MESSAGE VKD3D_DBG_LOG(MESSAGE) +#endif #ifndef TRACE_ON #define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE) @@ -178,7 +216,9 @@ const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); #define WARN_ON() (vkd3d_dbg_get_level() >= VKD3D_DBG_LEVEL_WARN) #endif +#ifndef FIXME_ONCE #define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) +#endif #define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name @@ -233,7 +273,7 @@ static inline unsigned int vkd3d_popcount(unsigned int v) { #ifdef _MSC_VER return __popcnt(v); -#elif defined(__MINGW32__) +#elif defined(HAVE_BUILTIN_POPCOUNT) return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; diff --git a/libs/vkd3d/include/private/vkd3d_memory.h b/libs/vkd3d/include/private/vkd3d_memory.h index 682d35c03c6..b157fc07cb7 100644 --- a/libs/vkd3d/include/private/vkd3d_memory.h +++ b/libs/vkd3d/include/private/vkd3d_memory.h @@ -19,7 +19,6 @@ #ifndef __VKD3D_MEMORY_H #define __VKD3D_MEMORY_H -#include #include #include #include @@ -44,7 +43,7 @@ static inline void *vkd3d_realloc(void *ptr, size_t size) static inline void *vkd3d_calloc(size_t count, size_t size) { void *ptr; - assert(count <= ~(size_t)0 / size); + VKD3D_ASSERT(!size || count <= ~(size_t)0 / size); if (!(ptr = calloc(count, size))) ERR("Out of memory.\n"); return ptr; diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index d3afcc11b16..d37d8ebad9e 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -105,6 +105,11 @@ enum vkd3d_shader_structure_type * \since 1.10 */ VKD3D_SHADER_STRUCTURE_TYPE_SCAN_COMBINED_RESOURCE_SAMPLER_INFO, + /** + * The structure is a vkd3d_shader_parameter_info structure. + * \since 1.13 + */ + VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_STRUCTURE_TYPE), }; @@ -453,44 +458,191 @@ enum vkd3d_shader_binding_flag VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_BINDING_FLAG), }; +/** + * The manner in which a parameter value is provided to the shader, used in + * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. + */ enum vkd3d_shader_parameter_type { VKD3D_SHADER_PARAMETER_TYPE_UNKNOWN, + /** The parameter value is embedded directly in the shader. */ VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT, + /** + * The parameter value is provided to the shader via a specialization + * constant. This value is only supported for the SPIR-V target type. + */ VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT, + /** + * The parameter value is provided to the shader as part of a uniform + * buffer. + * + * \since 1.13 + */ + VKD3D_SHADER_PARAMETER_TYPE_BUFFER, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_TYPE), }; +/** + * The format of data provided to the shader, used in + * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. + */ enum vkd3d_shader_parameter_data_type { VKD3D_SHADER_PARAMETER_DATA_TYPE_UNKNOWN, + /** The parameter is provided as a 32-bit unsigned integer. */ VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32, + /** The parameter is provided as a 32-bit float. \since 1.13 */ + VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_DATA_TYPE), }; +/** + * Names a specific shader parameter, used in + * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. + */ enum vkd3d_shader_parameter_name { VKD3D_SHADER_PARAMETER_NAME_UNKNOWN, + /** + * The sample count of the framebuffer, as returned by the HLSL function + * GetRenderTargetSampleCount() or the GLSL builtin gl_NumSamples. + * + * This parameter should be specified when compiling to SPIR-V, which + * provides no builtin ability to query this information from the shader. + * + * The default value is 1. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. + */ VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, + /** + * Alpha test comparison function. When this parameter is provided, if the + * alpha component of the pixel shader colour output at location 0 fails the + * test, as defined by this function and the reference value provided by + * VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, the fragment will be + * discarded. + * + * This parameter, along with VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, + * can be used to implement fixed function alpha test, as present in + * Direct3D versions up to 9, if the target environment does not support + * alpha test as part of its own fixed-function API (as Vulkan and core + * OpenGL). + * + * The default value is VKD3D_SHADER_COMPARISON_FUNC_ALWAYS. + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. The value specified must be + * a member of enum vkd3d_shader_comparison_func. + * + * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this + * version of vkd3d-shader. + * + * \since 1.13 + */ + VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC, + /** + * Alpha test reference value. + * See VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC for documentation of + * alpha test. + * + * The default value is zero. + * + * \since 1.13 + */ + VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, + /** + * Whether to use flat interpolation for fragment shader colour inputs. + * If the value is nonzero, inputs whose semantic usage is COLOR will use + * flat interpolation instead of linear. + * This parameter is ignored if the shader model is 4 or greater, since only + * shader model 3 and below do not specify the interpolation mode in the + * shader bytecode. + * + * This parameter can be used to implement fixed function shade mode, as + * present in Direct3D versions up to 9, if the target environment does not + * support shade mode as part of its own fixed-function API (as Vulkan and + * core OpenGL). + * + * The data type for this parameter must be + * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. + * + * The default value is zero, i.e. use linear interpolation. + * + * Only VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT is supported in this + * version of vkd3d-shader. + * + * \since 1.13 + */ + VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARAMETER_NAME), }; +/** + * The value of an immediate constant parameter, used in + * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. + */ struct vkd3d_shader_parameter_immediate_constant { union { + /** + * The value if the parameter's data type is + * VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32. + */ uint32_t u32; + /** + * The value if the parameter's data type is + * VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32. + * + * \since 1.13 + */ + float f32; } u; }; +/** + * The linkage of a specialization constant parameter, used in + * struct vkd3d_shader_parameter and struct vkd3d_shader_parameter1. + */ struct vkd3d_shader_parameter_specialization_constant { + /** The ID of the specialization constant. */ uint32_t id; }; +/** + * The linkage of a parameter specified through a uniform buffer, used in + * struct vkd3d_shader_parameter1. + */ +struct vkd3d_shader_parameter_buffer +{ + /** + * The set of the uniform buffer descriptor. If the target environment does + * not support descriptor sets, this value must be set to 0. + */ + unsigned int set; + /** The binding index of the uniform buffer descriptor. */ + unsigned int binding; + /** The byte offset of the parameter within the buffer. */ + uint32_t offset; +}; + +/** + * An individual shader parameter. + * + * This structure is an earlier version of struct vkd3d_shader_parameter1 + * which supports fewer parameter types; + * refer to that structure for usage information. + * + * Only the following types may be used with this structure: + * + * - VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT + * - VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT + */ struct vkd3d_shader_parameter { enum vkd3d_shader_parameter_name name; @@ -503,6 +655,56 @@ struct vkd3d_shader_parameter } u; }; +/** + * An individual shader parameter. + * + * This structure is used in struct vkd3d_shader_parameter_info; see there for + * explanation of shader parameters. + * + * For example, to specify the rasterizer sample count to the shader via an + * unsigned integer specialization constant with ID 3, + * set the following members: + * + * - \a name = VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT + * - \a type = VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT + * - \a data_type = VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32 + * - \a u.specialization_constant.id = 3 + * + * This structure is an extended version of struct vkd3d_shader_parameter. + */ +struct vkd3d_shader_parameter1 +{ + /** The builtin parameter to be mapped. */ + enum vkd3d_shader_parameter_name name; + /** How the parameter will be provided to the shader. */ + enum vkd3d_shader_parameter_type type; + /** + * The data type of the supplied parameter, which determines how it is to + * be interpreted. + */ + enum vkd3d_shader_parameter_data_type data_type; + union + { + /** + * Additional information if \a type is + * VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT. + */ + struct vkd3d_shader_parameter_immediate_constant immediate_constant; + /** + * Additional information if \a type is + * VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT. + */ + struct vkd3d_shader_parameter_specialization_constant specialization_constant; + /** + * Additional information if \a type is + * VKD3D_SHADER_PARAMETER_TYPE_BUFFER. + */ + struct vkd3d_shader_parameter_buffer buffer; + void *_pointer_pad; + uint32_t _pad[4]; + } u; +}; + /** * Symbolic register indices for mapping uniform constant register sets in * legacy Direct3D bytecode to constant buffer views in the target environment. @@ -1674,7 +1876,7 @@ enum vkd3d_shader_sysval_semantic VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX = 0x05, /** Vertex ID; SV_VertexID in Direct3D. */ VKD3D_SHADER_SV_VERTEX_ID = 0x06, - /** Primtive ID; SV_PrimitiveID in Direct3D. */ + /** Primitive ID; SV_PrimitiveID in Direct3D. */ VKD3D_SHADER_SV_PRIMITIVE_ID = 0x07, /** Instance ID; SV_InstanceID in Direct3D. */ VKD3D_SHADER_SV_INSTANCE_ID = 0x08, @@ -1994,6 +2196,44 @@ struct vkd3d_shader_varying_map_info unsigned int varying_count; }; +/** + * Interface information regarding a builtin shader parameter. + * + * Like compile options specified with struct vkd3d_shader_compile_option, + * parameters are used to specify certain values which are not part of the + * source shader bytecode but which need to be specified in the shader bytecode + * in the target format. + * Unlike struct vkd3d_shader_compile_option, however, this structure allows + * parameters to be specified in a variety of different ways, as described by + * enum vkd3d_shader_parameter_type. + * + * This structure is an extended version of struct vkd3d_shader_parameter as + * used in struct vkd3d_shader_spirv_target_info, which allows more parameter + * types to be used, and also allows specifying parameters when compiling + * shaders to target types other than SPIR-V. If this structure is chained + * along with vkd3d_shader_spirv_target_info, any parameters specified in the + * latter structure are ignored. + * + * This structure is passed to vkd3d_shader_compile() and extends + * vkd3d_shader_compile_info. + * + * This structure contains only input parameters. + * + * \since 1.13 + */ +struct vkd3d_shader_parameter_info +{ + /** Must be set to VKD3D_SHADER_STRUCTURE_TYPE_PARAMETER_INFO. */ + enum vkd3d_shader_structure_type type; + /** Optional pointer to a structure containing further parameters. */ + const void *next; + + /** Pointer to an array of dynamic parameters for this shader instance. */ + const struct vkd3d_shader_parameter1 *parameters; + /** Size, in elements, of \ref parameters. */ + unsigned int parameter_count; +}; + #ifdef LIBVKD3D_SHADER_SOURCE # define VKD3D_SHADER_API VKD3D_EXPORT #else @@ -2077,6 +2317,7 @@ VKD3D_SHADER_API const enum vkd3d_shader_target_type *vkd3d_shader_get_supported * - vkd3d_shader_descriptor_offset_info * - vkd3d_shader_hlsl_source_info * - vkd3d_shader_interface_info + * - vkd3d_shader_parameter_info * - vkd3d_shader_preprocess_info * - vkd3d_shader_scan_combined_resource_sampler_info * - vkd3d_shader_scan_descriptor_info diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c index f60ef7db769..c2c6ad67804 100644 --- a/libs/vkd3d/libs/vkd3d-common/blob.c +++ b/libs/vkd3d/libs/vkd3d-common/blob.c @@ -20,6 +20,7 @@ #define WIDL_C_INLINE_WRAPPERS #endif #define COBJMACROS + #define CONST_VTABLE #include "vkd3d.h" #include "vkd3d_blob.h" diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index 4523fc997ef..9a92f0ead02 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -22,7 +22,6 @@ #include "vkd3d_common.h" -#include #include #include #include @@ -45,11 +44,12 @@ extern const char *const vkd3d_dbg_env_name; static const char *const debug_level_names[] = { - [VKD3D_DBG_LEVEL_NONE ] = "none", - [VKD3D_DBG_LEVEL_ERR ] = "err", - [VKD3D_DBG_LEVEL_FIXME] = "fixme", - [VKD3D_DBG_LEVEL_WARN ] = "warn", - [VKD3D_DBG_LEVEL_TRACE] = "trace", + [VKD3D_DBG_LEVEL_NONE ] = "none", + [VKD3D_DBG_LEVEL_MESSAGE] = "message", + [VKD3D_DBG_LEVEL_ERR ] = "err", + [VKD3D_DBG_LEVEL_FIXME] = "fixme", + [VKD3D_DBG_LEVEL_WARN ] = "warn", + [VKD3D_DBG_LEVEL_TRACE] = "trace", }; enum vkd3d_dbg_level vkd3d_dbg_get_level(void) @@ -104,8 +104,6 @@ void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const ch if (vkd3d_dbg_get_level() < level) return; - assert(level < ARRAY_SIZE(debug_level_names)); - #ifdef _WIN32 vkd3d_dbg_output("vkd3d:%04lx:%s:%s ", GetCurrentThreadId(), debug_level_names[level], function); #elif HAVE_GETTID diff --git a/libs/vkd3d/libs/vkd3d-shader/checksum.c b/libs/vkd3d/libs/vkd3d-shader/checksum.c index 0910729a0e9..d9560628c77 100644 --- a/libs/vkd3d/libs/vkd3d-shader/checksum.c +++ b/libs/vkd3d/libs/vkd3d-shader/checksum.c @@ -288,7 +288,7 @@ void vkd3d_compute_dxbc_checksum(const void *dxbc, size_t size, uint32_t checksu const uint8_t *ptr = dxbc; struct md5_ctx ctx; - assert(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); + VKD3D_ASSERT(size > DXBC_CHECKSUM_SKIP_BYTE_COUNT); ptr += DXBC_CHECKSUM_SKIP_BYTE_COUNT; size -= DXBC_CHECKSUM_SKIP_BYTE_COUNT; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 9abc2c4db70..77e9711300f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -254,6 +254,10 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_PHASE ] = "phase", [VKD3DSIH_PHI ] = "phi", [VKD3DSIH_POW ] = "pow", + [VKD3DSIH_QUAD_READ_ACROSS_D ] = "quad_read_across_d", + [VKD3DSIH_QUAD_READ_ACROSS_X ] = "quad_read_across_x", + [VKD3DSIH_QUAD_READ_ACROSS_Y ] = "quad_read_across_y", + [VKD3DSIH_QUAD_READ_LANE_AT ] = "quad_read_lane_at", [VKD3DSIH_RCP ] = "rcp", [VKD3DSIH_REP ] = "rep", [VKD3DSIH_RESINFO ] = "resinfo", @@ -1199,7 +1203,7 @@ static void shader_print_register(struct vkd3d_d3d_asm_compiler *compiler, const { bool untyped = false; - switch (compiler->current->handler_idx) + switch (compiler->current->opcode) { case VKD3DSIH_MOV: case VKD3DSIH_MOVC: @@ -1755,7 +1759,7 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile { struct vkd3d_string_buffer *buffer = &compiler->buffer; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_BREAKP: case VKD3DSIH_CONTINUEP: @@ -1853,8 +1857,13 @@ static void shader_dump_instruction_flags(struct vkd3d_d3d_asm_compiler *compile break; case VKD3DSIH_TEX: - if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0) && (ins->flags & VKD3DSI_TEXLD_PROJECT)) - vkd3d_string_buffer_printf(buffer, "p"); + if (vkd3d_shader_ver_ge(&compiler->shader_version, 2, 0)) + { + if (ins->flags & VKD3DSI_TEXLD_PROJECT) + vkd3d_string_buffer_printf(buffer, "p"); + else if (ins->flags & VKD3DSI_TEXLD_BIAS) + vkd3d_string_buffer_printf(buffer, "b"); + } break; case VKD3DSIH_WAVE_OP_ADD: @@ -1910,7 +1919,7 @@ static void shader_dump_icb(struct vkd3d_d3d_asm_compiler *compiler, } else { - assert(icb->component_count == VKD3D_VEC4_SIZE); + VKD3D_ASSERT(icb->component_count == VKD3D_VEC4_SIZE); for (i = 0; i < icb->element_count; ++i) { shader_print_hex_literal(compiler, " {", icb->data[4 * i + 0], ""); @@ -1937,9 +1946,9 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, if (ins->coissue) vkd3d_string_buffer_printf(buffer, "+"); - shader_print_opcode(compiler, ins->handler_idx); + shader_print_opcode(compiler, ins->opcode); - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_DCL: case VKD3DSIH_DCL_UAV_TYPED: @@ -2242,7 +2251,7 @@ static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "oDepthGE"; case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "oDepthLE"; /* SV_Coverage has name vCoverage when used as an input, - * but it doens't appear in the signature in that case. */ + * but it doesn't appear in the signature in that case. */ case VKD3D_SHADER_SV_COVERAGE: return "oMask"; case VKD3D_SHADER_SV_STENCIL_REF: return "oStencilRef"; default: return "??"; @@ -2430,7 +2439,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_ELSE: case VKD3DSIH_ENDIF: @@ -2459,7 +2468,7 @@ enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, shader_dump_instruction(&compiler, ins); - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_ELSE: case VKD3DSIH_IF: diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index bfd5b52b436..d05394c3ab7 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -757,7 +757,7 @@ static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1, { /* d3d shaders have a maximum of 8192 constants; we should not overrun * this array. */ - assert((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); + VKD3D_ASSERT((index / 32) <= ARRAY_SIZE(sm1->constants[set].def_mask)); bitmap_set(sm1->constants[set].def_mask, index); } } @@ -1060,7 +1060,7 @@ static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1) static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins) { - if ((ins->handler_idx == VKD3DSIH_BREAKP || ins->handler_idx == VKD3DSIH_IF) && ins->flags) + if ((ins->opcode == VKD3DSIH_BREAKP || ins->opcode == VKD3DSIH_IF) && ins->flags) { vkd3d_shader_parser_warning(&sm1->p, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS, "Ignoring unexpected instruction flags %#x.", ins->flags); @@ -1142,23 +1142,23 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str goto fail; } - if (ins->handler_idx == VKD3DSIH_DCL) + if (ins->opcode == VKD3DSIH_DCL) { shader_sm1_read_semantic(sm1, &p, &ins->declaration.semantic); } - else if (ins->handler_idx == VKD3DSIH_DEF) + else if (ins->opcode == VKD3DSIH_DEF) { shader_sm1_read_dst_param(sm1, &p, dst_param); shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_FLOAT); shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); } - else if (ins->handler_idx == VKD3DSIH_DEFB) + else if (ins->opcode == VKD3DSIH_DEFB) { shader_sm1_read_dst_param(sm1, &p, dst_param); shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_SCALAR, VKD3D_DATA_UINT); shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true); } - else if (ins->handler_idx == VKD3DSIH_DEFI) + else if (ins->opcode == VKD3DSIH_DEFI) { shader_sm1_read_dst_param(sm1, &p, dst_param); shader_sm1_read_immconst(sm1, &p, &src_params[0], VSIR_DIMENSION_VEC4, VKD3D_DATA_INT); @@ -1195,7 +1195,7 @@ static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, str return; fail: - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; *ptr = sm1->end; } @@ -1272,7 +1272,7 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, st sm1->end = &code[token_count]; /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) + if (!vsir_program_init(program, compile_info, &version, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) return VKD3D_ERROR_OUT_OF_MEMORY; vkd3d_shader_parser_init(&sm1->p, program, message_context, compile_info->source_name); @@ -1326,7 +1326,7 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c ins = &instructions->elements[instructions->count]; shader_sm1_read_instruction(&sm1, ins); - if (ins->handler_idx == VKD3DSIH_INVALID) + if (ins->opcode == VKD3DSIH_INVALID) { WARN("Encountered unrecognized or invalid instruction.\n"); vsir_program_cleanup(program); @@ -1354,8 +1354,8 @@ int d3dbc_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t c return ret; } -bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg) +bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, + unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg) { unsigned int i; @@ -1365,56 +1365,56 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem bool output; enum vkd3d_shader_type shader_type; unsigned int major_version; - D3DSHADER_PARAM_REGISTER_TYPE type; + enum vkd3d_shader_register_type type; unsigned int offset; } register_table[] = { - {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT}, - {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT}, - {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT}, - {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE}, - - {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, - {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, - {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT}, - {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT}, - {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, - {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE}, - {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT}, - - {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT}, - {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG}, - {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, - {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION}, - {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT}, + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, VKD3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_COLOROUT}, + {"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_INPUT}, + {"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, VKD3DSPR_TEXTURE}, + + {"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, + {"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, + {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_DEPTHOUT}, + {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_COLOROUT}, + {"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, + {"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_FACE}, + {"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, VKD3DSPR_MISCTYPE, D3DSMO_POSITION}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, VKD3DSPR_TEXCRDOUT}, + + {"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_ATTROUT}, + {"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_FOG}, + {"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, + {"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POINT_SIZE}, + {"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_RASTOUT, D3DSRO_POSITION}, + {"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, VKD3DSPR_TEXCRDOUT}, }; for (i = 0; i < ARRAY_SIZE(register_table); ++i) { - if (!ascii_strcasecmp(semantic->name, register_table[i].semantic) + if (!ascii_strcasecmp(semantic_name, register_table[i].semantic) && output == register_table[i].output - && ctx->profile->type == register_table[i].shader_type - && ctx->profile->major_version == register_table[i].major_version) + && version->type == register_table[i].shader_type + && version->major == register_table[i].major_version) { *type = register_table[i].type; - if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT) + if (register_table[i].type == VKD3DSPR_MISCTYPE || register_table[i].type == VKD3DSPR_RASTOUT) *reg = register_table[i].offset; else - *reg = semantic->index; + *reg = semantic_index; return true; } } @@ -1422,7 +1422,8 @@ bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem return false; } -bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx) +bool hlsl_sm1_usage_from_semantic(const char *semantic_name, + uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx) { static const struct { @@ -1454,10 +1455,10 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU for (i = 0; i < ARRAY_SIZE(semantics); ++i) { - if (!ascii_strcasecmp(semantic->name, semantics[i].name)) + if (!ascii_strcasecmp(semantic_name, semantics[i].name)) { *usage = semantics[i].usage; - *usage_idx = semantic->index; + *usage_idx = semantic_index; return true; } } @@ -1465,6 +1466,17 @@ bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLU return false; } +struct d3dbc_compiler +{ + struct vsir_program *program; + struct vkd3d_bytecode_buffer buffer; + struct vkd3d_shader_message_context *message_context; + + /* OBJECTIVE: Store all the required information in the other fields so + * that this hlsl_ctx is no longer necessary. */ + struct hlsl_ctx *ctx; +}; + static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor) { if (type == VKD3D_SHADER_TYPE_VERTEX) @@ -1480,7 +1492,7 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) case HLSL_CLASS_ARRAY: return hlsl_sm1_class(type->e.array.type); case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) return D3DXPC_MATRIX_COLUMNS; else @@ -1497,13 +1509,22 @@ D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) case HLSL_CLASS_TEXTURE: case HLSL_CLASS_VERTEX_SHADER: return D3DXPC_OBJECT; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_UAV: case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_NULL: break; } @@ -1593,13 +1614,22 @@ D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) case HLSL_CLASS_VERTEX_SHADER: return D3DXPT_VERTEXSHADER; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_UAV: case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_NULL: break; } @@ -1677,8 +1707,7 @@ static void sm1_sort_externs(struct hlsl_ctx *ctx) list_move_tail(&ctx->extern_vars, &sorted); } -static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - struct hlsl_ir_function_decl *entry_func) +void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) { size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset; unsigned int uniform_count = 0; @@ -1739,11 +1768,11 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe } else { - put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id)); + put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index)); put_u32(buffer, var->bind_count[r]); } put_u32(buffer, 0); /* type */ - put_u32(buffer, 0); /* FIXME: default value */ + put_u32(buffer, 0); /* default value */ } } @@ -1767,6 +1796,62 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe write_sm1_type(buffer, var->data_type, ctab_start); set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start); + + if (var->default_values) + { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int comp_count = hlsl_type_component_count(var->data_type); + unsigned int default_value_offset; + unsigned int k; + + default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t)); + set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start); + + for (k = 0; k < comp_count; ++k) + { + struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); + unsigned int comp_offset; + enum hlsl_regset regset; + + comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); + if (regset == HLSL_REGSET_NUMERIC) + { + union + { + uint32_t u; + float f; + } uni; + + switch (comp_type->e.numeric.type) + { + case HLSL_TYPE_DOUBLE: + hlsl_fixme(ctx, &var->loc, "Write double default values."); + uni.u = 0; + break; + + case HLSL_TYPE_INT: + uni.f = var->default_values[k].number.i; + break; + + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + uni.f = var->default_values[k].number.u; + break; + + case HLSL_TYPE_HALF: + case HLSL_TYPE_FLOAT: + uni.u = var->default_values[k].number.u; + break; + + default: + vkd3d_unreachable(); + } + + set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u); + } + } + } + ++uniform_count; } } @@ -1778,7 +1863,7 @@ static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t))); } -static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type) +static uint32_t sm1_encode_register_type(enum vkd3d_shader_register_type type) { return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK) | ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2); @@ -1791,7 +1876,7 @@ struct sm1_instruction struct sm1_dst_register { - D3DSHADER_PARAM_REGISTER_TYPE type; + enum vkd3d_shader_register_type type; D3DSHADER_PARAM_DSTMOD_TYPE mod; unsigned int writemask; uint32_t reg; @@ -1799,19 +1884,45 @@ struct sm1_instruction struct sm1_src_register { - D3DSHADER_PARAM_REGISTER_TYPE type; + enum vkd3d_shader_register_type type; D3DSHADER_PARAM_SRCMOD_TYPE mod; unsigned int swizzle; uint32_t reg; - } srcs[3]; + } srcs[4]; unsigned int src_count; unsigned int has_dst; }; +static bool is_inconsequential_instr(const struct sm1_instruction *instr) +{ + const struct sm1_src_register *src = &instr->srcs[0]; + const struct sm1_dst_register *dst = &instr->dst; + unsigned int i; + + if (instr->opcode != D3DSIO_MOV) + return false; + if (dst->mod != D3DSPDM_NONE) + return false; + if (src->mod != D3DSPSM_NONE) + return false; + if (src->type != dst->type) + return false; + if (src->reg != dst->reg) + return false; + + for (i = 0; i < 4; ++i) + { + if ((dst->writemask & (1 << i)) && (vsir_swizzle_get_component(src->swizzle, i) != i)) + return false; + } + + return true; +} + static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg) { - assert(reg->writemask); + VKD3D_ASSERT(reg->writemask); put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg); } @@ -1821,15 +1932,19 @@ static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer, put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg); } -static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct sm1_instruction *instr) +static void d3dbc_write_instruction(struct d3dbc_compiler *d3dbc, const struct sm1_instruction *instr) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; uint32_t token = instr->opcode; unsigned int i; + if (is_inconsequential_instr(instr)) + return; + token |= VKD3D_SM1_INSTRUCTION_FLAGS_MASK & (instr->flags << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT); - if (ctx->profile->major_version > 1) + if (version->major > 1) token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT; put_u32(buffer, token); @@ -1845,54 +1960,53 @@ static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_w src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask); } -static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2, - const struct hlsl_reg *src3) +static void d3dbc_write_dp2add(struct d3dbc_compiler *d3dbc, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) { struct sm1_instruction instr = { .opcode = D3DSIO_DP2ADD, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.writemask = dst->writemask, .dst.reg = dst->id, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, + .srcs[1].type = VKD3DSPR_TEMP, .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), .srcs[1].reg = src2->id, - .srcs[2].type = D3DSPR_TEMP, + .srcs[2].type = VKD3DSPR_TEMP, .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), .srcs[2].reg = src3->id, .src_count = 3, }; - write_sm1_instruction(ctx, buffer, &instr); + d3dbc_write_instruction(d3dbc, &instr); } -static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, const struct hlsl_reg *src1, - const struct hlsl_reg *src2, const struct hlsl_reg *src3) +static void d3dbc_write_ternary_op(struct d3dbc_compiler *d3dbc, + D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, + const struct hlsl_reg *src1, const struct hlsl_reg *src2, const struct hlsl_reg *src3) { struct sm1_instruction instr = { .opcode = opcode, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.writemask = dst->writemask, .dst.reg = dst->id, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, + .srcs[1].type = VKD3DSPR_TEMP, .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), .srcs[1].reg = src2->id, - .srcs[2].type = D3DSPR_TEMP, + .srcs[2].type = VKD3DSPR_TEMP, .srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask), .srcs[2].reg = src3->id, .src_count = 3, @@ -1901,26 +2015,25 @@ static void write_sm1_ternary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buf sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); sm1_map_src_swizzle(&instr.srcs[2], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); + d3dbc_write_instruction(d3dbc, &instr); } -static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2) +static void d3dbc_write_binary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, + const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) { struct sm1_instruction instr = { .opcode = opcode, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.writemask = dst->writemask, .dst.reg = dst->id, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, + .srcs[1].type = VKD3DSPR_TEMP, .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), .srcs[1].reg = src2->id, .src_count = 2, @@ -1928,49 +2041,48 @@ static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buff sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); + d3dbc_write_instruction(d3dbc, &instr); } -static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src1, const struct hlsl_reg *src2) +static void d3dbc_write_dot(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, + const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2) { struct sm1_instruction instr = { .opcode = opcode, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.writemask = dst->writemask, .dst.reg = dst->id, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask), .srcs[0].reg = src1->id, - .srcs[1].type = D3DSPR_TEMP, + .srcs[1].type = VKD3DSPR_TEMP, .srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask), .srcs[1].reg = src2->id, .src_count = 2, }; - write_sm1_instruction(ctx, buffer, &instr); + d3dbc_write_instruction(d3dbc, &instr); } -static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst, - const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) +static void d3dbc_write_unary_op(struct d3dbc_compiler *d3dbc, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, + const struct hlsl_reg *dst, const struct hlsl_reg *src, + D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod) { struct sm1_instruction instr = { .opcode = opcode, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.mod = dst_mod, .dst.writemask = dst->writemask, .dst.reg = dst->id, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), .srcs[0].reg = src->id, .srcs[0].mod = src_mod, @@ -1978,19 +2090,19 @@ static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffe }; sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &instr); + d3dbc_write_instruction(d3dbc, &instr); } -static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) +static void d3dbc_write_cast(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); const struct hlsl_ir_node *arg1 = expr->operands[0].node; const struct hlsl_type *dst_type = expr->node.data_type; const struct hlsl_type *src_type = arg1->data_type; + struct hlsl_ctx *ctx = d3dbc->ctx; /* Narrowing casts were already lowered. */ - assert(src_type->dimx == dst_type->dimx); + VKD3D_ASSERT(src_type->dimx == dst_type->dimx); switch (dst_type->e.numeric.type) { @@ -2004,7 +2116,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b /* Integrals are internally represented as floats, so no change is necessary.*/ case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); break; case HLSL_TYPE_DOUBLE: @@ -2028,7 +2140,7 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b break; case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); break; case HLSL_TYPE_BOOL: @@ -2057,8 +2169,11 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } } -static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +static void d3dbc_write_constant_defs(struct d3dbc_compiler *d3dbc) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; + struct hlsl_ctx *ctx = d3dbc->ctx; unsigned int i, x; for (i = 0; i < ctx->constant_defs.count; ++i) @@ -2067,12 +2182,12 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ uint32_t token = D3DSIO_DEF; const struct sm1_dst_register reg = { - .type = D3DSPR_CONST, + .type = VKD3DSPR_CONST, .writemask = VKD3DSP_WRITEMASK_ALL, .reg = constant_reg->index, }; - if (ctx->profile->major_version > 1) + if (version->major > 1) token |= 5 << D3DSI_INSTLENGTH_SHIFT; put_u32(buffer, token); @@ -2082,32 +2197,32 @@ static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ } } -static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_var *var, bool output) +static void d3dbc_write_semantic_dcl(struct d3dbc_compiler *d3dbc, + const struct signature_element *element, bool output) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; struct sm1_dst_register reg = {0}; uint32_t token, usage_idx; D3DDECLUSAGE usage; bool ret; - if ((!output && !var->last_read) || (output && !var->first_write)) - return; - - if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) + if (hlsl_sm1_register_from_semantic(version, element->semantic_name, + element->semantic_index, output, ®.type, ®.reg)) { usage = 0; usage_idx = 0; } else { - ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx); - assert(ret); - reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT; - reg.reg = var->regs[HLSL_REGSET_NUMERIC].id; + ret = hlsl_sm1_usage_from_semantic(element->semantic_name, element->semantic_index, &usage, &usage_idx); + VKD3D_ASSERT(ret); + reg.type = output ? VKD3DSPR_OUTPUT : VKD3DSPR_INPUT; + reg.reg = element->register_index; } token = D3DSIO_DCL; - if (ctx->profile->major_version > 1) + if (version->major > 1) token |= 2 << D3DSI_INSTLENGTH_SHIFT; put_u32(buffer, token); @@ -2116,39 +2231,47 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT; put_u32(buffer, token); - reg.writemask = (1 << var->data_type->dimx) - 1; + reg.writemask = element->mask; write_sm1_dst_register(buffer, ®); } -static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +static void d3dbc_write_semantic_dcls(struct d3dbc_compiler *d3dbc) { + struct vsir_program *program = d3dbc->program; + const struct vkd3d_shader_version *version; bool write_in = false, write_out = false; - struct hlsl_ir_var *var; - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2) + version = &program->shader_version; + if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major >= 2) write_in = true; - else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3) + else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major == 3) write_in = write_out = true; - else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3) + else if (version->type == VKD3D_SHADER_TYPE_VERTEX && version->major < 3) write_in = true; - LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + if (write_in) { - if (write_in && var->is_input_semantic) - write_sm1_semantic_dcl(ctx, buffer, var, false); - if (write_out && var->is_output_semantic) - write_sm1_semantic_dcl(ctx, buffer, var, true); + for (unsigned int i = 0; i < program->input_signature.element_count; ++i) + d3dbc_write_semantic_dcl(d3dbc, &program->input_signature.elements[i], false); + } + + if (write_out) + { + for (unsigned int i = 0; i < program->output_signature.element_count; ++i) + d3dbc_write_semantic_dcl(d3dbc, &program->output_signature.elements[i], true); } } -static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +static void d3dbc_write_sampler_dcl(struct d3dbc_compiler *d3dbc, unsigned int reg_id, enum hlsl_sampler_dim sampler_dim) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct vkd3d_bytecode_buffer *buffer = &d3dbc->buffer; struct sm1_dst_register reg = {0}; uint32_t token, res_type = 0; token = D3DSIO_DCL; - if (ctx->profile->major_version > 1) + if (version->major > 1) token |= 2 << D3DSI_INSTLENGTH_SHIFT; put_u32(buffer, token); @@ -2175,20 +2298,22 @@ static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_bu token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT; put_u32(buffer, token); - reg.type = D3DSPR_SAMPLER; + reg.type = VKD3DSPR_COMBINED_SAMPLER; reg.writemask = VKD3DSP_WRITEMASK_ALL; reg.reg = reg_id; write_sm1_dst_register(buffer, ®); } -static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer) +static void d3dbc_write_sampler_dcls(struct d3dbc_compiler *d3dbc) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; + struct hlsl_ctx *ctx = d3dbc->ctx; enum hlsl_sampler_dim sampler_dim; unsigned int i, count, reg_id; struct hlsl_ir_var *var; - if (ctx->profile->major_version < 2) + if (version->major < 2) return; LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) @@ -2210,39 +2335,38 @@ static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b continue; } - reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i; - write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim); + reg_id = var->regs[HLSL_REGSET_SAMPLERS].index + i; + d3dbc_write_sampler_dcl(d3dbc, reg_id, sampler_dim); } } } } -static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) +static void d3dbc_write_constant(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr); struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.reg = instr->reg.id, .dst.writemask = instr->reg.writemask, .has_dst = 1, - .srcs[0].type = D3DSPR_CONST, + .srcs[0].type = VKD3DSPR_CONST, .srcs[0].reg = constant->reg.id, .srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask), .src_count = 1, }; - assert(instr->reg.allocated); - assert(constant->reg.allocated); + VKD3D_ASSERT(instr->reg.allocated); + VKD3D_ASSERT(constant->reg.allocated); sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); + d3dbc_write_instruction(d3dbc, &sm1_instr); } -static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, +static void d3dbc_write_per_component_unary_op(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode) { struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); @@ -2255,28 +2379,69 @@ static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_ src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i); dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i); - write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0); + d3dbc_write_unary_op(d3dbc, opcode, &dst, &src, 0, 0); } } -static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void d3dbc_write_sincos(struct d3dbc_compiler *d3dbc, enum hlsl_ir_expr_op op, + const struct hlsl_reg *dst, const struct hlsl_reg *src) { + struct sm1_instruction instr = + { + .opcode = D3DSIO_SINCOS, + + .dst.type = VKD3DSPR_TEMP, + .dst.writemask = dst->writemask, + .dst.reg = dst->id, + .has_dst = 1, + + .srcs[0].type = VKD3DSPR_TEMP, + .srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask), + .srcs[0].reg = src->id, + .src_count = 1, + }; + + if (op == HLSL_OP1_COS_REDUCED) + VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_0); + else /* HLSL_OP1_SIN_REDUCED */ + VKD3D_ASSERT(dst->writemask == VKD3DSP_WRITEMASK_1); + + if (d3dbc->ctx->profile->major_version < 3) + { + instr.src_count = 3; + + instr.srcs[1].type = VKD3DSPR_CONST; + instr.srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); + instr.srcs[1].reg = d3dbc->ctx->d3dsincosconst1.id; + + instr.srcs[2].type = VKD3DSPR_CONST; + instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL); + instr.srcs[2].reg = d3dbc->ctx->d3dsincosconst2.id; + } + + d3dbc_write_instruction(d3dbc, &instr); +} + +static void d3dbc_write_expr(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) +{ + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; struct hlsl_ir_expr *expr = hlsl_ir_expr(instr); struct hlsl_ir_node *arg1 = expr->operands[0].node; struct hlsl_ir_node *arg2 = expr->operands[1].node; struct hlsl_ir_node *arg3 = expr->operands[2].node; + struct hlsl_ctx *ctx = d3dbc->ctx; - assert(instr->reg.allocated); + VKD3D_ASSERT(instr->reg.allocated); if (expr->op == HLSL_OP1_REINTERPRET) { - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); return; } if (expr->op == HLSL_OP1_CAST) { - write_sm1_cast(ctx, buffer, instr); + d3dbc_write_cast(d3dbc, instr); return; } @@ -2290,70 +2455,75 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b switch (expr->op) { case HLSL_OP1_ABS: - write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0); break; case HLSL_OP1_DSX: - write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0); break; case HLSL_OP1_DSY: - write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0); break; case HLSL_OP1_EXP2: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP); + d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_EXP); break; case HLSL_OP1_LOG2: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG); + d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_LOG); break; case HLSL_OP1_NEG: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0); break; case HLSL_OP1_SAT: - write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); + d3dbc_write_unary_op(d3dbc, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE); break; case HLSL_OP1_RCP: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP); + d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RCP); break; case HLSL_OP1_RSQ: - write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ); + d3dbc_write_per_component_unary_op(d3dbc, instr, D3DSIO_RSQ); + break; + + case HLSL_OP1_COS_REDUCED: + case HLSL_OP1_SIN_REDUCED: + d3dbc_write_sincos(d3dbc, expr->op, &instr->reg, &arg1->reg); break; case HLSL_OP2_ADD: - write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP2_MAX: - write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP2_MIN: - write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP2_MUL: - write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP1_FRACT: - write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); + d3dbc_write_unary_op(d3dbc, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0); break; case HLSL_OP2_DOT: switch (arg1->data_type->dimx) { case 4: - write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_dot(d3dbc, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg); break; case 3: - write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_dot(d3dbc, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg); break; default: @@ -2362,27 +2532,31 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b break; case HLSL_OP2_LOGIC_AND: - write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP2_LOGIC_OR: - write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP2_SLT: - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + if (version->type == VKD3D_SHADER_TYPE_PIXEL) hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); - write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); + d3dbc_write_binary_op(d3dbc, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); break; case HLSL_OP3_CMP: - if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + if (version->type == VKD3D_SHADER_TYPE_VERTEX) hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); - write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + d3dbc_write_ternary_op(d3dbc, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); break; case HLSL_OP3_DP2ADD: - write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + d3dbc_write_dp2add(d3dbc, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); + break; + + case HLSL_OP3_MAD: + d3dbc_write_ternary_op(d3dbc, D3DSIO_MAD, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); break; default: @@ -2391,50 +2565,49 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } } -static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_block *block); +static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block); -static void write_sm1_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void d3dbc_write_if(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { const struct hlsl_ir_if *iff = hlsl_ir_if(instr); const struct hlsl_ir_node *condition; struct sm1_instruction sm1_ifc, sm1_else, sm1_endif; condition = iff->condition.node; - assert(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); + VKD3D_ASSERT(condition->data_type->dimx == 1 && condition->data_type->dimy == 1); sm1_ifc = (struct sm1_instruction) { .opcode = D3DSIO_IFC, .flags = VKD3D_SHADER_REL_OP_NE, /* Make it a "if_ne" instruction. */ - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), .srcs[0].reg = condition->reg.id, .srcs[0].mod = 0, - .srcs[1].type = D3DSPR_TEMP, + .srcs[1].type = VKD3DSPR_TEMP, .srcs[1].swizzle = hlsl_swizzle_from_writemask(condition->reg.writemask), .srcs[1].reg = condition->reg.id, .srcs[1].mod = D3DSPSM_NEG, .src_count = 2, }; - write_sm1_instruction(ctx, buffer, &sm1_ifc); - write_sm1_block(ctx, buffer, &iff->then_block); + d3dbc_write_instruction(d3dbc, &sm1_ifc); + d3dbc_write_block(d3dbc, &iff->then_block); if (!list_empty(&iff->else_block.instrs)) { sm1_else = (struct sm1_instruction){.opcode = D3DSIO_ELSE}; - write_sm1_instruction(ctx, buffer, &sm1_else); - write_sm1_block(ctx, buffer, &iff->else_block); + d3dbc_write_instruction(d3dbc, &sm1_else); + d3dbc_write_block(d3dbc, &iff->else_block); } sm1_endif = (struct sm1_instruction){.opcode = D3DSIO_ENDIF}; - write_sm1_instruction(ctx, buffer, &sm1_endif); + d3dbc_write_instruction(d3dbc, &sm1_endif); } -static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void d3dbc_write_jump(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr); @@ -2448,54 +2621,55 @@ static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b { .opcode = D3DSIO_TEXKILL, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.reg = reg->id, .dst.writemask = reg->writemask, .has_dst = 1, }; - write_sm1_instruction(ctx, buffer, &sm1_instr); + d3dbc_write_instruction(d3dbc, &sm1_instr); break; } default: - hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); + hlsl_fixme(d3dbc->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type)); } } -static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr) +static void d3dbc_write_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { const struct hlsl_ir_load *load = hlsl_ir_load(instr); + struct hlsl_ctx *ctx = d3dbc->ctx; const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src); struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.reg = instr->reg.id, .dst.writemask = instr->reg.writemask, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].reg = reg.id, .srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask), .src_count = 1, }; - assert(instr->reg.allocated); + VKD3D_ASSERT(instr->reg.allocated); if (load->src.var->is_uniform) { - assert(reg.allocated); - sm1_instr.srcs[0].type = D3DSPR_CONST; + VKD3D_ASSERT(reg.allocated); + sm1_instr.srcs[0].type = VKD3DSPR_CONST; } else if (load->src.var->is_input_semantic) { - if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic, - false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) + if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, load->src.var->semantic.name, + load->src.var->semantic.index, false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg)) { - assert(reg.allocated); - sm1_instr.srcs[0].type = D3DSPR_INPUT; + VKD3D_ASSERT(reg.allocated); + sm1_instr.srcs[0].type = VKD3DSPR_INPUT; sm1_instr.srcs[0].reg = reg.id; } else @@ -2503,32 +2677,34 @@ static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); + d3dbc_write_instruction(d3dbc, &sm1_instr); } -static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) +static void d3dbc_write_resource_load(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); struct hlsl_ir_node *coords = load->coords.node; + struct hlsl_ir_node *ddx = load->ddx.node; + struct hlsl_ir_node *ddy = load->ddy.node; unsigned int sampler_offset, reg_id; + struct hlsl_ctx *ctx = d3dbc->ctx; struct sm1_instruction sm1_instr; sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource); - reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset; + reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset; sm1_instr = (struct sm1_instruction) { - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.reg = instr->reg.id, .dst.writemask = instr->reg.writemask, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].reg = coords->reg.id, - .srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), + .srcs[0].swizzle = hlsl_swizzle_from_writemask(coords->reg.writemask), - .srcs[1].type = D3DSPR_SAMPLER, + .srcs[1].type = VKD3DSPR_COMBINED_SAMPLER, .srcs[1].reg = reg_id, .srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL), @@ -2546,69 +2722,82 @@ static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_ sm1_instr.opcode |= VKD3DSI_TEXLD_PROJECT << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; break; + case HLSL_RESOURCE_SAMPLE_LOD_BIAS: + sm1_instr.opcode = D3DSIO_TEX; + sm1_instr.opcode |= VKD3DSI_TEXLD_BIAS << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT; + break; + + case HLSL_RESOURCE_SAMPLE_GRAD: + sm1_instr.opcode = D3DSIO_TEXLDD; + + sm1_instr.srcs[2].type = VKD3DSPR_TEMP; + sm1_instr.srcs[2].reg = ddx->reg.id; + sm1_instr.srcs[2].swizzle = hlsl_swizzle_from_writemask(ddx->reg.writemask); + + sm1_instr.srcs[3].type = VKD3DSPR_TEMP; + sm1_instr.srcs[3].reg = ddy->reg.id; + sm1_instr.srcs[3].swizzle = hlsl_swizzle_from_writemask(ddy->reg.writemask); + + sm1_instr.src_count += 2; + break; + default: hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type); return; } - assert(instr->reg.allocated); + VKD3D_ASSERT(instr->reg.allocated); - write_sm1_instruction(ctx, buffer, &sm1_instr); + d3dbc_write_instruction(d3dbc, &sm1_instr); } -static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) +static void d3dbc_write_store(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { + const struct vkd3d_shader_version *version = &d3dbc->program->shader_version; const struct hlsl_ir_store *store = hlsl_ir_store(instr); - const struct hlsl_ir_node *rhs = store->rhs.node; + struct hlsl_ctx *ctx = d3dbc->ctx; const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs); + const struct hlsl_ir_node *rhs = store->rhs.node; struct sm1_instruction sm1_instr = { .opcode = D3DSIO_MOV, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.reg = reg.id, .dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask), .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].reg = rhs->reg.id, .srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask), .src_count = 1, }; - if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) - { - hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks."); - return; - } - if (store->lhs.var->is_output_semantic) { - if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1) + if (version->type == VKD3D_SHADER_TYPE_PIXEL && version->major == 1) { - sm1_instr.dst.type = D3DSPR_TEMP; + sm1_instr.dst.type = VKD3DSPR_TEMP; sm1_instr.dst.reg = 0; } - else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic, - true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) + else if (!hlsl_sm1_register_from_semantic(&d3dbc->program->shader_version, store->lhs.var->semantic.name, + store->lhs.var->semantic.index, true, &sm1_instr.dst.type, &sm1_instr.dst.reg)) { - assert(reg.allocated); - sm1_instr.dst.type = D3DSPR_OUTPUT; + VKD3D_ASSERT(reg.allocated); + sm1_instr.dst.type = VKD3DSPR_OUTPUT; sm1_instr.dst.reg = reg.id; } else sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1; } else - assert(reg.allocated); + VKD3D_ASSERT(reg.allocated); sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); + d3dbc_write_instruction(d3dbc, &sm1_instr); } -static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_ir_node *instr) +static void d3dbc_write_swizzle(struct d3dbc_compiler *d3dbc, const struct hlsl_ir_node *instr) { const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr); const struct hlsl_ir_node *val = swizzle->val.node; @@ -2616,27 +2805,27 @@ static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer { .opcode = D3DSIO_MOV, - .dst.type = D3DSPR_TEMP, + .dst.type = VKD3DSPR_TEMP, .dst.reg = instr->reg.id, .dst.writemask = instr->reg.writemask, .has_dst = 1, - .srcs[0].type = D3DSPR_TEMP, + .srcs[0].type = VKD3DSPR_TEMP, .srcs[0].reg = val->reg.id, .srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask), swizzle->swizzle, instr->data_type->dimx), .src_count = 1, }; - assert(instr->reg.allocated); - assert(val->reg.allocated); + VKD3D_ASSERT(instr->reg.allocated); + VKD3D_ASSERT(val->reg.allocated); sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask); - write_sm1_instruction(ctx, buffer, &sm1_instr); + d3dbc_write_instruction(d3dbc, &sm1_instr); } -static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, - const struct hlsl_block *block) +static void d3dbc_write_block(struct d3dbc_compiler *d3dbc, const struct hlsl_block *block) { + struct hlsl_ctx *ctx = d3dbc->ctx; const struct hlsl_ir_node *instr; LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) @@ -2656,38 +2845,38 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * vkd3d_unreachable(); case HLSL_IR_CONSTANT: - write_sm1_constant(ctx, buffer, instr); + d3dbc_write_constant(d3dbc, instr); break; case HLSL_IR_EXPR: - write_sm1_expr(ctx, buffer, instr); + d3dbc_write_expr(d3dbc, instr); break; case HLSL_IR_IF: if (hlsl_version_ge(ctx, 2, 1)) - write_sm1_if(ctx, buffer, instr); + d3dbc_write_if(d3dbc, instr); else hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches."); break; case HLSL_IR_JUMP: - write_sm1_jump(ctx, buffer, instr); + d3dbc_write_jump(d3dbc, instr); break; case HLSL_IR_LOAD: - write_sm1_load(ctx, buffer, instr); + d3dbc_write_load(d3dbc, instr); break; case HLSL_IR_RESOURCE_LOAD: - write_sm1_resource_load(ctx, buffer, instr); + d3dbc_write_resource_load(d3dbc, instr); break; case HLSL_IR_STORE: - write_sm1_store(ctx, buffer, instr); + d3dbc_write_store(d3dbc, instr); break; case HLSL_IR_SWIZZLE: - write_sm1_swizzle(ctx, buffer, instr); + d3dbc_write_swizzle(d3dbc, instr); break; default: @@ -2696,32 +2885,45 @@ static void write_sm1_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * } } -int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out) +/* OBJECTIVE: Stop relying on ctx and entry_func on this function, receiving + * data from the other parameters instead, so it can be removed as an argument + * and be declared in vkd3d_shader_private.h and used without relying on HLSL + * IR structs. */ +int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, + struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { - struct vkd3d_bytecode_buffer buffer = {0}; + const struct vkd3d_shader_version *version = &program->shader_version; + struct d3dbc_compiler d3dbc = {0}; + struct vkd3d_bytecode_buffer *buffer = &d3dbc.buffer; + + d3dbc.ctx = ctx; + d3dbc.program = program; + d3dbc.message_context = message_context; - put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version)); + put_u32(buffer, sm1_version(version->type, version->major, version->minor)); - write_sm1_uniforms(ctx, &buffer, entry_func); + bytecode_put_bytes(buffer, ctab->code, ctab->size); - write_sm1_constant_defs(ctx, &buffer); - write_sm1_semantic_dcls(ctx, &buffer); - write_sm1_sampler_dcls(ctx, &buffer); - write_sm1_block(ctx, &buffer, &entry_func->body); + d3dbc_write_constant_defs(&d3dbc); + d3dbc_write_semantic_dcls(&d3dbc); + d3dbc_write_sampler_dcls(&d3dbc); + d3dbc_write_block(&d3dbc, &entry_func->body); - put_u32(&buffer, D3DSIO_END); + put_u32(buffer, D3DSIO_END); - if (buffer.status) - ctx->result = buffer.status; + if (buffer->status) + ctx->result = buffer->status; if (!ctx->result) { - out->code = buffer.data; - out->size = buffer.size; + out->code = buffer->data; + out->size = buffer->size; } else { - vkd3d_free(buffer.data); + vkd3d_free(buffer->data); } return ctx->result; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index 4b9f67235aa..184788dc57e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -29,7 +29,7 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void { struct vkd3d_shader_dxbc_section_desc *section; - assert(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); + VKD3D_ASSERT(dxbc->section_count < ARRAY_SIZE(dxbc->sections)); section = &dxbc->sections[dxbc->section_count++]; section->tag = tag; @@ -983,7 +983,7 @@ static int shader_parse_root_signature(const struct vkd3d_shader_code *data, { struct vkd3d_shader_root_signature_desc1 *v_1_1 = &desc->u.v_1_1; - assert(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); + VKD3D_ASSERT(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); v_1_1->parameter_count = count; if (v_1_1->parameter_count) @@ -1777,7 +1777,7 @@ int vkd3d_shader_convert_root_signature(struct vkd3d_shader_versioned_root_signa } else { - assert(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); + VKD3D_ASSERT(version == VKD3D_SHADER_ROOT_SIGNATURE_VERSION_1_1); ret = convert_root_signature_to_v1_1(dst, src); } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 73a8d8687c5..4a17c62292b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -458,6 +458,8 @@ enum dx_intrinsic_opcode DX_WAVE_ACTIVE_OP = 119, DX_WAVE_ACTIVE_BIT = 120, DX_WAVE_PREFIX_OP = 121, + DX_QUAD_READ_LANE_AT = 122, + DX_QUAD_OP = 123, DX_LEGACY_F32TOF16 = 130, DX_LEGACY_F16TOF32 = 131, DX_WAVE_ALL_BIT_COUNT = 135, @@ -576,6 +578,13 @@ enum dxil_wave_op_kind WAVE_OP_MAX = 3, }; +enum dxil_quad_op_kind +{ + QUAD_READ_ACROSS_X = 0, + QUAD_READ_ACROSS_Y = 1, + QUAD_READ_ACROSS_D = 2, +}; + struct sm6_pointer_info { const struct sm6_type *type; @@ -932,7 +941,7 @@ static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length if (!length) return 0; - assert(length < 32); + VKD3D_ASSERT(length < 32); if (sm6_parser_is_end(sm6)) { @@ -940,7 +949,7 @@ static uint32_t sm6_parser_read_bits(struct sm6_parser *sm6, unsigned int length return 0; } - assert(sm6->bitpos < 32); + VKD3D_ASSERT(sm6->bitpos < 32); bits = *sm6->ptr >> sm6->bitpos; l = 32 - sm6->bitpos; if (l <= length) @@ -1199,7 +1208,7 @@ static enum vkd3d_result sm6_parser_add_global_abbrev(struct sm6_parser *sm6) struct dxil_global_abbrev *global_abbrev; enum vkd3d_result ret; - assert(block->id == BLOCKINFO_BLOCK); + VKD3D_ASSERT(block->id == BLOCKINFO_BLOCK); if (!vkd3d_array_reserve((void **)&sm6->abbrevs, &sm6->abbrev_capacity, sm6->abbrev_count + 1, sizeof(*sm6->abbrevs)) || !(global_abbrev = vkd3d_malloc(sizeof(*global_abbrev) + count * sizeof(global_abbrev->abbrev.operands[0])))) @@ -1468,7 +1477,7 @@ static enum vkd3d_result dxil_block_init(struct dxil_block *block, const struct if (sm6->abbrevs[i]->block_id == block->id) block->abbrevs[abbrev_count++] = &sm6->abbrevs[i]->abbrev; - assert(abbrev_count == block->abbrev_count); + VKD3D_ASSERT(abbrev_count == block->abbrev_count); } if ((ret = dxil_block_read(block, sm6)) < 0) @@ -1546,7 +1555,7 @@ static char *dxil_record_to_string(const struct dxil_record *record, unsigned in unsigned int i; char *str; - assert(offset <= record->operand_count); + VKD3D_ASSERT(offset <= record->operand_count); if (!(str = vkd3d_calloc(record->operand_count - offset + 1, 1))) { vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_OUT_OF_MEMORY, @@ -1834,7 +1843,7 @@ static enum vkd3d_result sm6_parser_type_table_init(struct sm6_parser *sm6) ++sm6->type_count; } - assert(sm6->type_count == type_count); + VKD3D_ASSERT(sm6->type_count == type_count); if (struct_name) { @@ -2207,13 +2216,13 @@ static inline bool sm6_value_is_function_dcl(const struct sm6_value *value) static inline bool sm6_value_is_dx_intrinsic_dcl(const struct sm6_value *fn) { - assert(sm6_value_is_function_dcl(fn)); + VKD3D_ASSERT(sm6_value_is_function_dcl(fn)); return fn->u.function.is_prototype && !strncmp(fn->u.function.name, "dx.op.", 6); } static inline struct sm6_value *sm6_parser_get_current_value(const struct sm6_parser *sm6) { - assert(sm6->value_count < sm6->value_capacity); + VKD3D_ASSERT(sm6->value_count < sm6->value_capacity); return &sm6->values[sm6->value_count]; } @@ -3395,7 +3404,7 @@ static struct vkd3d_shader_instruction *sm6_parser_add_instruction(struct sm6_pa enum vkd3d_shader_opcode handler_idx) { struct vkd3d_shader_instruction *ins = sm6_parser_require_space(sm6, 1); - assert(ins); + VKD3D_ASSERT(ins); vsir_instruction_init(ins, &sm6->p.location, handler_idx); ++sm6->p.program->instructions.count; return ins; @@ -3642,7 +3651,7 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init { const struct sm6_value *value; - assert(index); + VKD3D_ASSERT(index); --index; if (!(value = sm6_parser_get_value_safe(sm6, index)) || (!sm6_value_is_icb(value) && !sm6_value_is_undef(value))) { @@ -3755,21 +3764,21 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) for (i = 0; i < sm6->p.program->instructions.count; ++i) { ins = &sm6->p.program->instructions.elements[i]; - if (ins->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) + if (ins->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP && ins->declaration.indexable_temp.initialiser) { ins->declaration.indexable_temp.initialiser = resolve_forward_initialiser( (uintptr_t)ins->declaration.indexable_temp.initialiser, sm6); } - else if (ins->handler_idx == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) + else if (ins->opcode == VKD3DSIH_DCL_IMMEDIATE_CONSTANT_BUFFER) { ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); } - else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) + else if (ins->opcode == VKD3DSIH_DCL_TGSM_RAW) { ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); ins->flags = 0; } - else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) + else if (ins->opcode == VKD3DSIH_DCL_TGSM_STRUCTURED) { ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); ins->flags = 0; @@ -3886,7 +3895,7 @@ static void sm6_parser_init_signature(struct sm6_parser *sm6, const struct shade if (e->register_count > 1 || (is_patch_constant && vsir_sysval_semantic_is_tess_factor(e->sysval_semantic))) param->reg.idx[count++].offset = 0; - assert(count < ARRAY_SIZE(param->reg.idx)); + VKD3D_ASSERT(count < ARRAY_SIZE(param->reg.idx)); param->reg.idx[count++].offset = i; param->reg.idx_count = count; } @@ -4289,7 +4298,7 @@ static void sm6_parser_emit_binop(struct sm6_parser *sm6, const struct dxil_reco if (!(flags & FP_ALLOW_UNSAFE_ALGEBRA)) ins->flags |= VKD3DSI_PRECISE_X; flags &= ~FP_ALLOW_UNSAFE_ALGEBRA; - /* SPIR-V FPFastMathMode is only available in the Kernel executon model. */ + /* SPIR-V FPFastMathMode is only available in the Kernel execution model. */ silence_warning = !(flags & ~(FP_NO_NAN | FP_NO_INF | FP_NO_SIGNED_ZEROS | FP_ALLOW_RECIPROCAL)); break; case VKD3DSIH_IADD: @@ -4402,7 +4411,7 @@ static void sm6_parser_emit_br(struct sm6_parser *sm6, const struct dxil_record code_block->terminator.false_block = sm6_function_get_block(function, record->operands[1], sm6); } - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; } static bool sm6_parser_emit_reg_composite_construct(struct sm6_parser *sm6, const struct vkd3d_shader_register **operand_regs, @@ -4619,6 +4628,8 @@ static enum vkd3d_shader_opcode map_dx_binary_op(enum dx_intrinsic_opcode op, co return VKD3DSIH_IMAX; case DX_IMIN: return VKD3DSIH_IMIN; + case DX_QUAD_READ_LANE_AT: + return VKD3DSIH_QUAD_READ_LANE_AT; case DX_UMAX: return VKD3DSIH_UMAX; case DX_UMIN: @@ -4855,10 +4866,10 @@ static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intr return; src_param_init_vector_from_reg(src_param, &buffer->u.handle.reg); register_index_address_init(&src_param->reg.idx[2], operands[1], sm6); - assert(src_param->reg.idx_count == 3); + VKD3D_ASSERT(src_param->reg.idx_count == 3); type = sm6_type_get_scalar_type(dst->type, 0); - assert(type); + VKD3D_ASSERT(type); src_param->reg.data_type = vkd3d_data_type_from_sm6_type(type); if (data_type_is_64_bit(src_param->reg.data_type)) src_param->swizzle = vsir_swizzle_64_from_32(src_param->swizzle); @@ -4962,7 +4973,7 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int reg->non_uniform = !!sm6_value_get_constant_uint(operands[3]); /* NOP is used to flag no instruction emitted. */ - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; } static void sm6_parser_emit_dx_stream(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, @@ -5200,7 +5211,7 @@ static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_in instruction_dst_param_init_temp_vector(ins++, sm6); state->temp_idx = 1; - /* DXIL does not have an instrinsic for sample info, and resinfo is expected to return + /* DXIL does not have an intrinsic for sample info, and resinfo is expected to return * the sample count in .w for MS textures. The result is always a struct of 4 x uint32. */ vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SAMPLE_INFO); ins->flags = VKD3DSI_SAMPLE_INFO_UINT; @@ -5331,7 +5342,7 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin if (!is_patch_constant && !operands[3]->is_undefined) { - assert(src_param->reg.idx_count > count); + VKD3D_ASSERT(src_param->reg.idx_count > count); register_index_address_init(&src_param->reg.idx[count], operands[3], sm6); } @@ -5370,6 +5381,47 @@ static void sm6_parser_emit_dx_primitive_id(struct sm6_parser *sm6, enum dx_intr sm6_parser_emit_dx_input_register_mov(sm6, state->ins, VKD3DSPR_PRIMID, VKD3D_DATA_UINT); } +static enum vkd3d_shader_opcode dx_map_quad_op(enum dxil_quad_op_kind op) +{ + switch (op) + { + case QUAD_READ_ACROSS_X: + return VKD3DSIH_QUAD_READ_ACROSS_X; + case QUAD_READ_ACROSS_Y: + return VKD3DSIH_QUAD_READ_ACROSS_Y; + case QUAD_READ_ACROSS_D: + return VKD3DSIH_QUAD_READ_ACROSS_D; + default: + return VKD3DSIH_INVALID; + } +} + +static void sm6_parser_emit_dx_quad_op(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + enum vkd3d_shader_opcode opcode; + enum dxil_quad_op_kind quad_op; + + quad_op = sm6_value_get_constant_uint(operands[1]); + if ((opcode = dx_map_quad_op(quad_op)) == VKD3DSIH_INVALID) + { + FIXME("Unhandled quad op kind %u.\n", quad_op); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_UNHANDLED_INTRINSIC, + "Quad op kind %u is unhandled.", quad_op); + return; + } + + vsir_instruction_init(ins, &sm6->p.location, opcode); + + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + src_param_init_from_value(src_param, operands[0]); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static void sm6_parser_emit_dx_raw_buffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -6229,6 +6281,8 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_MAKE_DOUBLE ] = {"d", "ii", sm6_parser_emit_dx_make_double}, [DX_OUTPUT_CONTROL_POINT_ID ] = {"i", "", sm6_parser_emit_dx_output_control_point_id}, [DX_PRIMITIVE_ID ] = {"i", "", sm6_parser_emit_dx_primitive_id}, + [DX_QUAD_OP ] = {"n", "Rc", sm6_parser_emit_dx_quad_op}, + [DX_QUAD_READ_LANE_AT ] = {"n", "Ri", sm6_parser_emit_dx_binary}, [DX_RAW_BUFFER_LOAD ] = {"o", "Hii8i", sm6_parser_emit_dx_raw_buffer_load}, [DX_RAW_BUFFER_STORE ] = {"v", "Hiioooocc", sm6_parser_emit_dx_raw_buffer_store}, [DX_ROUND_NE ] = {"g", "R", sm6_parser_emit_dx_unary}, @@ -6346,7 +6400,7 @@ static bool sm6_parser_validate_dx_op(struct sm6_parser *sm6, enum dx_intrinsic_ info = &sm6_dx_op_table[op]; - assert(info->ret_type[0]); + VKD3D_ASSERT(info->ret_type[0]); if (!sm6_parser_validate_operand_type(sm6, dst, info->ret_type[0], NULL, true)) { WARN("Failed to validate return type for dx intrinsic id %u, '%s'.\n", op, name); @@ -6381,7 +6435,7 @@ static void sm6_parser_emit_unhandled(struct sm6_parser *sm6, struct vkd3d_shade { const struct sm6_type *type; - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; if (!dst->type) return; @@ -6551,7 +6605,7 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ else if (to->u.width > from->u.width) { op = (code == CAST_ZEXT) ? VKD3DSIH_UTOU : VKD3DSIH_ITOI; - assert(from->u.width == 1 || to->u.width == 64); + VKD3D_ASSERT(from->u.width == 1 || to->u.width == 64); is_valid = from_int && to_int; } break; @@ -6628,7 +6682,7 @@ static void sm6_parser_emit_cast(struct sm6_parser *sm6, const struct dxil_recor { *dst = *value; dst->type = type; - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; return; } @@ -6739,7 +6793,7 @@ static void sm6_parser_emit_cmp2(struct sm6_parser *sm6, const struct dxil_recor * do not otherwise occur, so deleting these avoids the need for backend support. */ if (sm6_type_is_bool(type_a) && code == ICMP_NE && sm6_value_is_constant_zero(b)) { - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; *dst = *a; return; } @@ -7039,7 +7093,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record reg->idx_count = 2; dst->structure_stride = src->structure_stride; - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; } static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_record *record, @@ -7087,7 +7141,7 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor if (ptr->structure_stride) { - assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + VKD3D_ASSERT(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) @@ -7189,7 +7243,7 @@ static void sm6_parser_emit_phi(struct sm6_parser *sm6, const struct dxil_record incoming[j].block = sm6_function_get_block(function, record->operands[i + 1], sm6); } - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; qsort(incoming, phi->incoming_count, sizeof(*incoming), phi_incoming_compare); @@ -7224,7 +7278,7 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record code_block->terminator.type = TERMINATOR_RET; - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; } static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, @@ -7270,7 +7324,7 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco if (ptr->structure_stride) { - assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + VKD3D_ASSERT(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED); if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) @@ -7326,7 +7380,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec if (!(src = sm6_parser_get_value_by_ref(sm6, record, type, &i)) || !sm6_value_validate_is_register(src, sm6)) return; - assert(i == 2); + VKD3D_ASSERT(i == 2); if (src->type != type) { @@ -7384,7 +7438,7 @@ static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_rec terminator->cases[i / 2u].value = sm6_value_get_constant_uint64(src); } - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; } static void sm6_parser_emit_vselect(struct sm6_parser *sm6, const struct dxil_record *record, @@ -7636,7 +7690,7 @@ static void metadata_attachment_record_apply(const struct dxil_record *record, e "Ignoring a nested metadata attachment."); } - assert(record->operand_count & 1); + VKD3D_ASSERT(record->operand_count & 1); for (i = 1; i < record->operand_count; i += 2) { if (!(m = sm6_parser_find_metadata_kind(sm6, record->operands[i]))) @@ -7843,7 +7897,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const } ins = &code_block->instructions[code_block->instruction_count]; - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; dst = sm6_parser_get_current_value(sm6); fwd_type = dst->type; @@ -7922,7 +7976,6 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const if (sm6->p.failed) return VKD3D_ERROR; - assert(ins->handler_idx != VKD3DSIH_INVALID); if (record->attachment) metadata_attachment_record_apply(record->attachment, record->code, ins, dst, sm6); @@ -7933,9 +7986,7 @@ static enum vkd3d_result sm6_parser_function_init(struct sm6_parser *sm6, const code_block = (block_idx < function->block_count) ? function->blocks[block_idx] : NULL; } if (code_block) - code_block->instruction_count += ins->handler_idx != VKD3DSIH_NOP; - else - assert(ins->handler_idx == VKD3DSIH_NOP); + code_block->instruction_count += ins->opcode != VKD3DSIH_NOP; if (dst->type && fwd_type && dst->type != fwd_type) { @@ -8002,7 +8053,7 @@ static void sm6_block_emit_terminator(const struct sm6_block *block, struct sm6_ switch_case = &block->terminator.cases[i]; if (!(case_block = switch_case->block)) { - assert(sm6->p.failed); + VKD3D_ASSERT(sm6->p.failed); continue; } if (switch_case->is_default) @@ -8071,7 +8122,7 @@ static void sm6_block_emit_phi(const struct sm6_block *block, struct sm6_parser if (incoming_block) vsir_src_param_init_label(&src_params[index + 1], incoming_block->id); else - assert(sm6->p.failed); + VKD3D_ASSERT(sm6->p.failed); } dst_param_init(dst_param); @@ -8735,7 +8786,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc if (!m) { - ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; + ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; ins->declaration.raw_resource.resource.reg.write_mask = 0; return &ins->declaration.raw_resource.resource; } @@ -8760,7 +8811,7 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc "A typed resource has no data type."); } - ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; + ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_TYPED : VKD3DSIH_DCL; for (i = 0; i < VKD3D_VEC4_SIZE; ++i) ins->declaration.semantic.resource_data_type[i] = resource_values.data_type; ins->declaration.semantic.resource_type = resource_type; @@ -8770,14 +8821,14 @@ static struct vkd3d_shader_resource *sm6_parser_resources_load_common_info(struc } else if (kind == RESOURCE_KIND_RAWBUFFER) { - ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; + ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_RAW : VKD3DSIH_DCL_RESOURCE_RAW; ins->declaration.raw_resource.resource.reg.write_mask = 0; return &ins->declaration.raw_resource.resource; } else if (kind == RESOURCE_KIND_STRUCTUREDBUFFER) { - ins->handler_idx = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; + ins->opcode = is_uav ? VKD3DSIH_DCL_UAV_STRUCTURED : VKD3DSIH_DCL_RESOURCE_STRUCTURED; ins->declaration.structured_resource.byte_stride = resource_values.byte_stride; ins->declaration.structured_resource.resource.reg.write_mask = 0; @@ -8858,7 +8909,7 @@ static enum vkd3d_result sm6_parser_resources_load_srv(struct sm6_parser *sm6, d->kind = kind; d->reg_type = VKD3DSPR_RESOURCE; d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_RESOURCE; - d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL) + d->resource_data_type = (ins->opcode == VKD3DSIH_DCL) ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; init_resource_declaration(resource, VKD3DSPR_RESOURCE, d->reg_data_type, d->id, &d->range); @@ -8932,7 +8983,7 @@ static enum vkd3d_result sm6_parser_resources_load_uav(struct sm6_parser *sm6, d->kind = values[0]; d->reg_type = VKD3DSPR_UAV; d->reg_data_type = (ins->resource_type == VKD3D_SHADER_RESOURCE_BUFFER) ? VKD3D_DATA_UINT : VKD3D_DATA_UAV; - d->resource_data_type = (ins->handler_idx == VKD3DSIH_DCL_UAV_TYPED) + d->resource_data_type = (ins->opcode == VKD3DSIH_DCL_UAV_TYPED) ? ins->declaration.semantic.resource_data_type[0] : VKD3D_DATA_UNUSED; init_resource_declaration(resource, VKD3DSPR_UAV, d->reg_data_type, d->id, &d->range); @@ -10155,12 +10206,13 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 return NULL; } -static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, const char *source_name, +static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) { size_t count, length, function_count, expected_function_count, byte_code_size = dxbc_desc->byte_code_size; + const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; struct shader_signature *patch_constant_signature, *output_signature, *input_signature; - const struct vkd3d_shader_location location = {.source_name = source_name}; uint32_t version_token, dxil_version, token_count, magic; const uint32_t *byte_code = dxbc_desc->byte_code; unsigned int chunk_offset, chunk_size; @@ -10251,9 +10303,9 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, struct vsir_pro /* Estimate instruction count to avoid reallocation in most shaders. */ count = max(token_count, 400) - 400; - if (!vsir_program_init(program, &version, (count + (count >> 2)) / 2u + 10)) + if (!vsir_program_init(program, compile_info, &version, (count + (count >> 2)) / 2u + 10)) return VKD3D_ERROR_OUT_OF_MEMORY; - vkd3d_shader_parser_init(&sm6->p, program, message_context, source_name); + vkd3d_shader_parser_init(&sm6->p, program, message_context, compile_info->source_name); sm6->ptr = &sm6->start[1]; sm6->bitpos = 2; @@ -10489,7 +10541,7 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co uint32_t *byte_code = NULL; int ret; - ERR("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); + MESSAGE("Creating a DXIL parser. This is unsupported; you get to keep all the pieces if it breaks.\n"); dxbc_desc.is_dxil = true; if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, @@ -10514,7 +10566,7 @@ int dxil_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t co dxbc_desc.byte_code = byte_code; } - ret = sm6_parser_init(&sm6, program, compile_info->source_name, message_context, &dxbc_desc); + ret = sm6_parser_init(&sm6, program, compile_info, message_context, &dxbc_desc); free_dxbc_shader_desc(&dxbc_desc); vkd3d_free(byte_code); diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index 57b4ac24212..a1d1fd6572f 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -56,6 +56,114 @@ static void string_storage_destroy(struct rb_entry *entry, void *context) vkd3d_free(string_entry); } +struct function_component +{ + const char *name; + bool lhs_has_index; + unsigned int lhs_index; +}; + +static const struct state_block_function_info +{ + const char *name; + unsigned int min_args, max_args; + const struct function_component components[3]; + unsigned int min_profile; +} +function_info[] = +{ + {"SetBlendState", 3, 3, { { "AB_BlendFactor" }, { "AB_SampleMask" }, { "BlendState" } }, 4 }, + {"SetDepthStencilState", 2, 2, { { "DS_StencilRef" }, { "DepthStencilState" } }, 4 }, + {"SetRasterizerState", 1, 1, { { "RasterizerState" } }, 4 }, + {"SetVertexShader", 1, 1, { { "VertexShader" } }, 4 }, + {"SetDomainShader", 1, 1, { { "DomainShader" } }, 5 }, + {"SetHullShader", 1, 1, { { "HullShader" } }, 5 }, + {"SetGeometryShader", 1, 1, { { "GeometryShader" } }, 4 }, + {"SetPixelShader", 1, 1, { { "PixelShader" } }, 4 }, + {"SetComputeShader", 1, 1, { { "ComputeShader" } }, 4 }, + {"OMSetRenderTargets", 2, 9, { {0} }, 4 }, +}; + +static const struct state_block_function_info *get_state_block_function_info(const char *name) +{ + for (unsigned int i = 0; i < ARRAY_SIZE(function_info); ++i) + { + if (!strcmp(name, function_info[i].name)) + return &function_info[i]; + } + return NULL; +} + +static void add_function_component(struct function_component **components, const char *name, + bool lhs_has_index, unsigned int lhs_index) +{ + struct function_component *comp = *components; + + comp->name = name; + comp->lhs_has_index = lhs_has_index; + comp->lhs_index = lhs_index; + + *components = *components + 1; +} + +static void get_state_block_function_components(const struct state_block_function_info *info, + struct function_component *components, unsigned int comp_count) +{ + unsigned int i; + + VKD3D_ASSERT(comp_count <= info->max_args); + + if (info->min_args == info->max_args) + { + const struct function_component *c = info->components; + for (i = 0; i < comp_count; ++i, ++c) + add_function_component(&components, c->name, c->lhs_has_index, c->lhs_index); + return; + } + + if (!strcmp(info->name, "OMSetRenderTargets")) + { + for (i = 0; i < comp_count - 2; ++i) + add_function_component(&components, "RenderTargetView", true, i + 1); + add_function_component(&components, "DepthStencilView", false, 0); + add_function_component(&components, "RenderTargetView", true, 0); + } +} + +bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, + const struct vkd3d_shader_location *loc) +{ + if (entry->is_function_call) + { + const struct state_block_function_info *info = get_state_block_function_info(entry->name); + + if (!info) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, + "Invalid state block function '%s'.", entry->name); + return false; + } + if (entry->args_count < info->min_args || entry->args_count > info->max_args) + { + if (info->min_args == info->max_args) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, + "Invalid argument count for state block function '%s' (expected %u).", + entry->name, info->min_args); + } + else + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, + "Invalid argument count for state block function '%s' (expected from %u to %u).", + entry->name, info->min_args, info->max_args); + } + return false; + } + } + + return true; +} + struct fx_write_context; struct fx_write_context_ops @@ -63,6 +171,7 @@ struct fx_write_context_ops uint32_t (*write_string)(const char *string, struct fx_write_context *fx); void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); + void (*write_annotation)(struct hlsl_ir_var *var, struct fx_write_context *fx); bool are_child_effects_supported; }; @@ -94,6 +203,10 @@ struct fx_write_context uint32_t texture_count; uint32_t uav_count; uint32_t sampler_state_count; + uint32_t depth_stencil_state_count; + uint32_t rasterizer_state_count; + uint32_t blend_state_count; + uint32_t string_count; int status; bool child_effect; @@ -122,14 +235,46 @@ static uint32_t write_string(const char *string, struct fx_write_context *fx) static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) { - if (var->state_block_count) - hlsl_fixme(fx->ctx, &var->loc, "Write state block assignments."); - fx->ops->write_pass(var, fx); } +static uint32_t write_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) +{ + struct hlsl_ctx *ctx = fx->ctx; + struct hlsl_ir_var *v; + uint32_t count = 0; + + if (!scope) + return 0; + + LIST_FOR_EACH_ENTRY(v, &scope->vars, struct hlsl_ir_var, scope_entry) + { + if (!v->default_values) + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Annotation variable is missing default value."); + + fx->ops->write_annotation(v, fx); + ++count; + } + + return count; +} + +static void write_fx_4_annotations(struct hlsl_scope *scope, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t count_offset, count; + + count_offset = put_u32(buffer, 0); + count = write_annotations(scope, fx); + set_u32(buffer, count_offset, count); +} + static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); static const char * get_fx_4_type_name(const struct hlsl_type *type); +static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx); +static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, + uint32_t count_offset, struct fx_write_context *fx); static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) { @@ -138,7 +283,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context unsigned int elements_count; const char *name; - assert(fx->ctx->profile->major_version >= 4); + VKD3D_ASSERT(fx->ctx->profile->major_version >= 4); if (type->class == HLSL_CLASS_ARRAY) { @@ -274,15 +419,14 @@ static uint32_t write_fx_4_string(const char *string, struct fx_write_context *f static void write_fx_4_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) { struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t name_offset; + uint32_t name_offset, count_offset; name_offset = write_string(var->name, fx); put_u32(buffer, name_offset); - put_u32(buffer, 0); /* Assignment count. */ - put_u32(buffer, 0); /* Annotation count. */ + count_offset = put_u32(buffer, 0); - /* TODO: annotations */ - /* TODO: assignments */ + write_fx_4_annotations(var->annotations, fx); + write_fx_4_state_block(var, 0, count_offset, fx); } static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) @@ -297,6 +441,12 @@ static void write_fx_2_pass(struct hlsl_ir_var *var, struct fx_write_context *fx /* TODO: annotations */ /* TODO: assignments */ + + if (var->state_block_count && var->state_blocks[0]->count) + hlsl_fixme(fx->ctx, &var->loc, "Write pass assignments."); + + /* For some reason every pass adds to the total shader object count. */ + fx->shader_count++; } static uint32_t get_fx_4_type_size(const struct hlsl_type *type) @@ -402,6 +552,9 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) case HLSL_CLASS_UAV: return uav_type_names[type->sampler_dim]; + case HLSL_CLASS_DEPTH_STENCIL_STATE: + return "DepthStencilState"; + case HLSL_CLASS_DEPTH_STENCIL_VIEW: return "DepthStencilView"; @@ -414,6 +567,9 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) case HLSL_CLASS_PIXEL_SHADER: return "PixelShader"; + case HLSL_CLASS_STRING: + return "String"; + default: return type->name; } @@ -421,10 +577,20 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type) static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) { + struct field_offsets + { + uint32_t name; + uint32_t semantic; + uint32_t offset; + uint32_t type; + }; + uint32_t name_offset, offset, total_size, packed_size, stride, numeric_desc; struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; - uint32_t name_offset, offset, size, stride, numeric_desc; + struct field_offsets *field_offsets = NULL; + struct hlsl_ctx *ctx = fx->ctx; uint32_t elements_count = 0; const char *name; + size_t i; /* Resolve arrays to element type and number of elements. */ if (type->class == HLSL_CLASS_ARRAY) @@ -436,6 +602,22 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co name = get_fx_4_type_name(type); name_offset = write_string(name, fx); + if (type->class == HLSL_CLASS_STRUCT) + { + if (!(field_offsets = hlsl_calloc(ctx, type->e.record.field_count, sizeof(*field_offsets)))) + return 0; + + for (i = 0; i < type->e.record.field_count; ++i) + { + const struct hlsl_struct_field *field = &type->e.record.fields[i]; + + field_offsets[i].name = write_string(field->name, fx); + field_offsets[i].semantic = write_string(field->semantic.raw_name, fx); + field_offsets[i].offset = field->reg_offset[HLSL_REGSET_NUMERIC]; + field_offsets[i].type = write_type(field->type, fx); + } + } + offset = put_u32_unaligned(buffer, name_offset); switch (type->class) @@ -446,13 +628,21 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co put_u32_unaligned(buffer, 1); break; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_TEXTURE: case HLSL_CLASS_UAV: case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_STRING: put_u32_unaligned(buffer, 2); break; @@ -464,43 +654,50 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_NULL: vkd3d_unreachable(); - case HLSL_CLASS_STRING: case HLSL_CLASS_VOID: FIXME("Writing type class %u is not implemented.\n", type->class); set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); return 0; } - size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); + /* Structures can only contain numeric fields, this is validated during variable declaration. */ + total_size = stride = type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float); + packed_size = 0; + if (type->class == HLSL_CLASS_STRUCT || hlsl_is_numeric_type(type)) + packed_size = hlsl_type_component_count(type) * sizeof(float); if (elements_count) - size *= elements_count; + { + total_size *= elements_count; + packed_size *= elements_count; + } stride = align(stride, 4 * sizeof(float)); put_u32_unaligned(buffer, elements_count); - put_u32_unaligned(buffer, size); /* Total size. */ - put_u32_unaligned(buffer, stride); /* Stride. */ - put_u32_unaligned(buffer, size); + put_u32_unaligned(buffer, total_size); + put_u32_unaligned(buffer, stride); + put_u32_unaligned(buffer, packed_size); if (type->class == HLSL_CLASS_STRUCT) { - size_t i; - put_u32_unaligned(buffer, type->e.record.field_count); for (i = 0; i < type->e.record.field_count; ++i) { - const struct hlsl_struct_field *field = &type->e.record.fields[i]; - uint32_t semantic_offset, field_type_offset; + const struct field_offsets *field = &field_offsets[i]; - name_offset = write_string(field->name, fx); - semantic_offset = write_string(field->semantic.name, fx); - field_type_offset = write_type(field->type, fx); + put_u32_unaligned(buffer, field->name); + put_u32_unaligned(buffer, field->semantic); + put_u32_unaligned(buffer, field->offset); + put_u32_unaligned(buffer, field->type); + } - put_u32_unaligned(buffer, name_offset); - put_u32_unaligned(buffer, semantic_offset); - put_u32_unaligned(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]); - put_u32_unaligned(buffer, field_type_offset); + if (ctx->profile->major_version == 5) + { + put_u32_unaligned(buffer, 0); /* Base class type */ + put_u32_unaligned(buffer, 0); /* Interface count */ } } else if (type->class == HLSL_CLASS_TEXTURE) @@ -556,18 +753,46 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co { put_u32_unaligned(buffer, 6); } + else if (type->class == HLSL_CLASS_RASTERIZER_STATE) + { + put_u32_unaligned(buffer, 4); + } + else if (type->class == HLSL_CLASS_DEPTH_STENCIL_STATE) + { + put_u32_unaligned(buffer, 3); + } + else if (type->class == HLSL_CLASS_BLEND_STATE) + { + put_u32_unaligned(buffer, 2); + } + else if (type->class == HLSL_CLASS_STRING) + { + put_u32_unaligned(buffer, 1); + } else if (hlsl_is_numeric_type(type)) { numeric_desc = get_fx_4_numeric_type_description(type, fx); put_u32_unaligned(buffer, numeric_desc); } + else if (type->class == HLSL_CLASS_COMPUTE_SHADER) + { + put_u32_unaligned(buffer, 28); + } + else if (type->class == HLSL_CLASS_HULL_SHADER) + { + put_u32_unaligned(buffer, 29); + } + else if (type->class == HLSL_CLASS_DOMAIN_SHADER) + { + put_u32_unaligned(buffer, 30); + } else { FIXME("Type %u is not supported.\n", type->class); set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); - return 0; } + vkd3d_free(field_offsets); return offset; } @@ -581,8 +806,9 @@ static void write_fx_4_technique(struct hlsl_ir_var *var, struct fx_write_contex name_offset = write_string(var->name, fx); put_u32(buffer, name_offset); count_offset = put_u32(buffer, 0); - put_u32(buffer, 0); /* Annotation count. */ + write_fx_4_annotations(var->annotations, fx); + count = 0; LIST_FOR_EACH_ENTRY(pass, &var->scope->vars, struct hlsl_ir_var, scope_entry) { write_pass(pass, fx); @@ -617,7 +843,7 @@ static void write_group(struct hlsl_ir_var *var, struct fx_write_context *fx) put_u32(buffer, name_offset); count_offset = put_u32(buffer, 0); /* Technique count */ - put_u32(buffer, 0); /* Annotation count */ + write_fx_4_annotations(var ? var->annotations : NULL, fx); count = fx->technique_count; write_techniques(var ? var->scope : fx->ctx->globals, fx); @@ -668,6 +894,13 @@ static uint32_t write_fx_2_string(const char *string, struct fx_write_context *f return offset; } +static uint32_t get_fx_2_type_class(const struct hlsl_type *type) +{ + if (type->class == HLSL_CLASS_MATRIX) + return D3DXPC_MATRIX_ROWS; + return hlsl_sm1_class(type); +} + static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic, struct fx_write_context *fx) { @@ -683,10 +916,10 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n } name_offset = write_string(name, fx); - semantic_offset = write_string(semantic->name, fx); + semantic_offset = semantic->raw_name ? write_string(semantic->raw_name, fx) : 0; offset = put_u32(buffer, hlsl_sm1_base_type(type)); - put_u32(buffer, hlsl_sm1_class(type)); + put_u32(buffer, get_fx_2_type_class(type)); put_u32(buffer, name_offset); put_u32(buffer, semantic_offset); put_u32(buffer, elements_count); @@ -705,6 +938,10 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n case HLSL_CLASS_STRUCT: put_u32(buffer, type->e.record.field_count); break; + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_PIXEL_SHADER: + fx->shader_count += elements_count; + break; default: ; } @@ -716,7 +953,7 @@ static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *n const struct hlsl_struct_field *field = &type->e.record.fields[i]; /* Validated in check_invalid_object_fields(). */ - assert(hlsl_is_numeric_type(field->type)); + VKD3D_ASSERT(hlsl_is_numeric_type(field->type)); write_fx_2_parameter(field->type, field->name, &field->semantic, fx); } } @@ -794,6 +1031,9 @@ static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct f case HLSL_CLASS_MATRIX: case HLSL_CLASS_STRUCT: /* FIXME: write actual initial value */ + if (var->default_values) + hlsl_fixme(fx->ctx, &var->loc, "Write default values.\n"); + offset = put_u32(buffer, 0); for (uint32_t i = 1; i < size / sizeof(uint32_t); ++i) @@ -850,15 +1090,24 @@ static bool is_type_supported_fx_2(struct hlsl_ctx *ctx, const struct hlsl_type hlsl_fixme(ctx, loc, "Write fx 2.0 parameter class %#x.", type->class); return false; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_UAV: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_VOID: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: return false; case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_NULL: /* This cannot appear as an extern variable. */ break; } @@ -910,7 +1159,7 @@ static const struct fx_write_context_ops fx_2_ops = static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) { - uint32_t offset, size, technique_count, parameter_count, object_count; + uint32_t offset, size, technique_count, shader_count, parameter_count, object_count; struct vkd3d_bytecode_buffer buffer = { 0 }; struct vkd3d_bytecode_buffer *structured; struct fx_write_context fx; @@ -927,7 +1176,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) parameter_count = put_u32(structured, 0); /* Parameter count */ technique_count = put_u32(structured, 0); - put_u32(structured, 0); /* Unknown */ + shader_count = put_u32(structured, 0); object_count = put_u32(structured, 0); write_fx_2_parameters(&fx); @@ -936,6 +1185,7 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) write_techniques(ctx->globals, &fx); set_u32(structured, technique_count, fx.technique_count); + set_u32(structured, shader_count, fx.shader_count); put_u32(structured, 0); /* String count */ put_u32(structured, 0); /* Resource count */ @@ -972,9 +1222,93 @@ static const struct fx_write_context_ops fx_4_ops = .write_string = write_fx_4_string, .write_technique = write_fx_4_technique, .write_pass = write_fx_4_pass, + .write_annotation = write_fx_4_annotation, .are_child_effects_supported = true, }; +static uint32_t write_fx_4_default_value(struct hlsl_type *value_type, struct hlsl_default_value *value, + struct fx_write_context *fx) +{ + const struct hlsl_type *type = hlsl_get_multiarray_element_type(value_type); + uint32_t elements_count = hlsl_get_multiarray_size(value_type), i, j; + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + struct hlsl_ctx *ctx = fx->ctx; + uint32_t offset = buffer->size; + unsigned int comp_count; + + if (!value) + return 0; + + comp_count = hlsl_type_component_count(type); + + for (i = 0; i < elements_count; ++i) + { + switch (type->class) + { + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_VECTOR: + case HLSL_CLASS_MATRIX: + { + switch (type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_INT: + case HLSL_TYPE_UINT: + case HLSL_TYPE_BOOL: + + for (j = 0; j < comp_count; ++j) + { + put_u32_unaligned(buffer, value->number.u); + value++; + } + break; + default: + hlsl_fixme(ctx, &ctx->location, "Writing default values for numeric type %u is not implemented.", + type->e.numeric.type); + } + + break; + } + case HLSL_CLASS_STRUCT: + { + struct hlsl_struct_field *fields = type->e.record.fields; + + for (j = 0; j < type->e.record.field_count; ++j) + { + write_fx_4_default_value(fields[i].type, value, fx); + value += hlsl_type_component_count(fields[i].type); + } + break; + } + default: + hlsl_fixme(ctx, &ctx->location, "Writing default values for class %u is not implemented.", type->class); + } + } + + return offset; +} + +static void write_fx_4_string_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) +{ + uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i; + const struct hlsl_default_value *value = var->default_values; + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + struct hlsl_ctx *ctx = fx->ctx; + uint32_t offset; + + if (!value) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "String objects have to be initialized."); + return; + } + + for (i = 0; i < elements_count; ++i, ++value) + { + offset = write_fx_4_string(value->string, fx); + put_u32(buffer, offset); + } +} + static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, struct fx_write_context *fx) { struct vkd3d_bytecode_buffer *buffer = &fx->structured; @@ -984,22 +1318,20 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st { HAS_EXPLICIT_BIND_POINT = 0x4, }; - struct hlsl_ctx *ctx = fx->ctx; - /* Explicit bind point. */ - if (var->reg_reservation.reg_type) + if (var->has_explicit_bind_point) flags |= HAS_EXPLICIT_BIND_POINT; type_offset = write_type(var->data_type, fx); name_offset = write_string(var->name, fx); - semantic_offset = write_string(var->semantic.name, fx); + semantic_offset = write_string(var->semantic.raw_name, fx); put_u32(buffer, name_offset); put_u32(buffer, type_offset); semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ - put_u32(buffer, var->buffer_offset); /* Offset in the constant buffer */ - value_offset = put_u32(buffer, 0); /* Default value offset */ + put_u32(buffer, var->buffer_offset * 4); /* Offset in the constant buffer, in bytes. */ + value_offset = put_u32(buffer, 0); put_u32(buffer, flags); /* Flags */ if (shared) @@ -1008,17 +1340,43 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, bool shared, st } else { - /* FIXME: write default value */ - set_u32(buffer, value_offset, 0); + uint32_t offset = write_fx_4_default_value(var->data_type, var->default_values, fx); + set_u32(buffer, value_offset, offset); - put_u32(buffer, 0); /* Annotations count */ - if (has_annotations(var)) - hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); + write_fx_4_annotations(var->annotations, fx); fx->numeric_variable_count++; } } +static void write_fx_4_annotation(struct hlsl_ir_var *var, struct fx_write_context *fx) +{ + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t name_offset, type_offset, offset; + struct hlsl_ctx *ctx = fx->ctx; + + name_offset = write_string(var->name, fx); + type_offset = write_type(var->data_type, fx); + + put_u32(buffer, name_offset); + put_u32(buffer, type_offset); + + if (hlsl_is_numeric_type(type)) + { + offset = write_fx_4_default_value(var->data_type, var->default_values, fx); + put_u32(buffer, offset); + } + else if (type->class == HLSL_CLASS_STRING) + { + write_fx_4_string_initializer(var, fx); + } + else + { + hlsl_fixme(ctx, &var->loc, "Writing annotations for type class %u is not implemented.", type->class); + } +} + struct rhs_named_value { const char *name; @@ -1086,11 +1444,8 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl struct hlsl_ctx *ctx = fx->ctx; struct hlsl_ir_node *value = entry->args->node; - if (entry->lhs_has_index) - hlsl_fixme(ctx, &var->loc, "Unsupported assignment to array element."); - put_u32(buffer, entry->name_id); - put_u32(buffer, 0); /* TODO: destination index */ + put_u32(buffer, entry->lhs_index); type_offset = put_u32(buffer, 0); rhs_offset = put_u32(buffer, 0); @@ -1104,6 +1459,17 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl assignment_type = 1; break; } + case HLSL_IR_LOAD: + { + struct hlsl_ir_load *l = hlsl_ir_load(value); + + if (l->src.path_len) + hlsl_fixme(ctx, &var->loc, "Indexed access in RHS values is not implemented."); + + value_offset = write_fx_4_string(l->src.var->name, fx); + assignment_type = 2; + break; + } default: hlsl_fixme(ctx, &var->loc, "Unsupported assignment type for state %s.", entry->name); } @@ -1112,14 +1478,28 @@ static void write_fx_4_state_assignment(const struct hlsl_ir_var *var, struct hl set_u32(buffer, rhs_offset, value_offset); } -static bool state_block_contains_state(const char *name, unsigned int start, struct hlsl_state_block *block) +static bool state_block_contains_state(const struct hlsl_state_block_entry *entry, unsigned int start_index, + struct hlsl_state_block *block) { unsigned int i; - for (i = start; i < block->count; ++i) + for (i = start_index; i < block->count; ++i) { - if (!ascii_strcasecmp(block->entries[i]->name, name)) - return true; + const struct hlsl_state_block_entry *cur = block->entries[i]; + + if (cur->is_function_call) + continue; + + if (ascii_strcasecmp(cur->name, entry->name)) + continue; + + if (cur->lhs_has_index != entry->lhs_has_index) + continue; + + if (cur->lhs_has_index && cur->lhs_index != entry->lhs_index) + continue; + + return true; } return false; @@ -1131,6 +1511,24 @@ struct replace_state_context struct hlsl_ir_var *var; }; +static bool lower_null_constant(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) +{ + struct hlsl_ir_node *c; + + if (instr->type != HLSL_IR_CONSTANT) + return false; + if (instr->data_type->class != HLSL_CLASS_NULL) + return false; + + if (!(c = hlsl_new_uint_constant(ctx, 0, &instr->loc))) + return false; + + list_add_before(&instr->entry, &c->entry); + hlsl_replace_node(instr, c); + + return true; +} + static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct replace_state_context *replace_context = context; @@ -1160,6 +1558,93 @@ static bool replace_state_block_constant(struct hlsl_ctx *ctx, struct hlsl_ir_no return true; } +enum state_property_component_type +{ + FX_BOOL, + FX_FLOAT, + FX_UINT, + FX_UINT8, + FX_DEPTHSTENCIL, + FX_RASTERIZER, + FX_DOMAINSHADER, + FX_HULLSHADER, + FX_COMPUTESHADER, + FX_TEXTURE, + FX_DEPTHSTENCILVIEW, + FX_RENDERTARGETVIEW, + FX_BLEND, + FX_VERTEXSHADER, + FX_PIXELSHADER, +}; + +static inline bool is_object_fx_type(enum state_property_component_type type) +{ + switch (type) + { + case FX_DEPTHSTENCIL: + case FX_RASTERIZER: + case FX_DOMAINSHADER: + case FX_HULLSHADER: + case FX_COMPUTESHADER: + case FX_TEXTURE: + case FX_RENDERTARGETVIEW: + case FX_DEPTHSTENCILVIEW: + case FX_BLEND: + case FX_VERTEXSHADER: + case FX_PIXELSHADER: + return true; + default: + return false; + } +} + +static inline enum hlsl_type_class hlsl_type_class_from_fx_type(enum state_property_component_type type) +{ + switch (type) + { + case FX_DEPTHSTENCIL: + return HLSL_CLASS_DEPTH_STENCIL_STATE; + case FX_RASTERIZER: + return HLSL_CLASS_RASTERIZER_STATE; + case FX_DOMAINSHADER: + return HLSL_CLASS_DOMAIN_SHADER; + case FX_HULLSHADER: + return HLSL_CLASS_HULL_SHADER; + case FX_COMPUTESHADER: + return HLSL_CLASS_COMPUTE_SHADER; + case FX_TEXTURE: + return HLSL_CLASS_TEXTURE; + case FX_RENDERTARGETVIEW: + return HLSL_CLASS_RENDER_TARGET_VIEW; + case FX_DEPTHSTENCILVIEW: + return HLSL_CLASS_DEPTH_STENCIL_VIEW; + case FX_BLEND: + return HLSL_CLASS_BLEND_STATE; + case FX_VERTEXSHADER: + return HLSL_CLASS_VERTEX_SHADER; + case FX_PIXELSHADER: + return HLSL_CLASS_PIXEL_SHADER; + default: + vkd3d_unreachable(); + } +} + +static inline enum hlsl_base_type hlsl_type_from_fx_type(enum state_property_component_type type) +{ + switch (type) + { + case FX_BOOL: + return HLSL_TYPE_BOOL; + case FX_FLOAT: + return HLSL_TYPE_FLOAT; + case FX_UINT: + case FX_UINT8: + return HLSL_TYPE_UINT; + default: + vkd3d_unreachable(); + } +} + static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl_state_block_entry *entry, struct fx_write_context *fx) { @@ -1209,44 +1694,233 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl { NULL } }; + static const struct rhs_named_value depth_write_mask_values[] = + { + { "ZERO", 0 }, + { "ALL", 1 }, + { NULL } + }; + + static const struct rhs_named_value comparison_values[] = + { + { "NEVER", 1 }, + { "LESS", 2 }, + { "EQUAL", 3 }, + { "LESS_EQUAL", 4 }, + { "GREATER", 5 }, + { "NOT_EQUAL", 6 }, + { "GREATER_EQUAL", 7 }, + { "ALWAYS", 8 }, + { NULL } + }; + + static const struct rhs_named_value stencil_op_values[] = + { + { "KEEP", 1 }, + { "ZERO", 2 }, + { "REPLACE", 3 }, + { "INCR_SAT", 4 }, + { "DECR_SAT", 5 }, + { "INVERT", 6 }, + { "INCR", 7 }, + { "DECR", 8 }, + { NULL } + }; + + static const struct rhs_named_value fill_values[] = + { + { "WIREFRAME", 2 }, + { "SOLID", 3 }, + { NULL } + }; + + static const struct rhs_named_value cull_values[] = + { + { "NONE", 1 }, + { "FRONT", 2 }, + { "BACK", 3 }, + { NULL } + }; + + static const struct rhs_named_value blend_values[] = + { + { "ZERO", 1 }, + { "ONE", 2 }, + { "SRC_COLOR", 3 }, + { "INV_SRC_COLOR", 4 }, + { "SRC_ALPHA", 5 }, + { "INV_SRC_ALPHA", 6 }, + { "DEST_ALPHA", 7 }, + { "INV_DEST_ALPHA", 8 }, + { "DEST_COLOR", 9 }, + { "INV_DEST_COLOR", 10 }, + { "SRC_ALPHA_SAT", 11 }, + { "BLEND_FACTOR", 14 }, + { "INV_BLEND_FACTOR", 15 }, + { "SRC1_COLOR", 16 }, + { "INV_SRC1_COLOR", 17 }, + { "SRC1_ALPHA", 18 }, + { "INV_SRC1_ALPHA", 19 }, + { NULL } + }; + + static const struct rhs_named_value blendop_values[] = + { + { "ADD", 1 }, + { "SUBTRACT", 2 }, + { "REV_SUBTRACT", 3 }, + { "MIN", 4 }, + { "MAX", 5 }, + { NULL } + }; + + static const struct rhs_named_value bool_values[] = + { + { "FALSE", 0 }, + { "TRUE", 1 }, + { NULL } + }; + + static const struct rhs_named_value null_values[] = + { + { "NULL", 0 }, + { NULL } + }; + static const struct state { const char *name; enum hlsl_type_class container; - enum hlsl_base_type type; + enum hlsl_type_class class; + enum state_property_component_type type; unsigned int dimx; + unsigned int array_size; uint32_t id; const struct rhs_named_value *values; } states[] = { - { "Filter", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 45, filter_values }, - { "AddressU", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 46, address_values }, - { "AddressV", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 47, address_values }, - { "AddressW", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 48, address_values }, - { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 49 }, - { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 50 }, - { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_TYPE_UINT, 1, 51, compare_func_values }, - { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 4, 52 }, - { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 53 }, - { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_TYPE_FLOAT, 1, 54 }, - /* TODO: "Texture" field */ + { "RasterizerState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RASTERIZER, 1, 1, 0 }, + { "DepthStencilState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCIL, 1, 1, 1 }, + { "BlendState", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_BLEND, 1, 1, 2 }, + { "RenderTargetView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_RENDERTARGETVIEW, 1, 8, 3 }, + { "DepthStencilView", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DEPTHSTENCILVIEW, 1, 1, 4 }, + + { "VertexShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_VERTEXSHADER, 1, 1, 6 }, + { "PixelShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_PIXELSHADER, 1, 1, 7 }, + { "DS_StencilRef", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 9 }, + { "AB_BlendFactor", HLSL_CLASS_PASS, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 10 }, + { "AB_SampleMask", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 11 }, + + { "FillMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 12, fill_values }, + { "CullMode", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 13, cull_values }, + { "FrontCounterClockwise", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 14, bool_values }, + { "DepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 15 }, + { "DepthBiasClamp", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 16 }, + { "SlopeScaledDepthBias", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 17 }, + { "DepthClipEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 18, bool_values }, + { "ScissorEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 19, bool_values }, + { "MultisampleEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 20, bool_values }, + { "AntializedLineEnable", HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 21, bool_values }, + + { "DepthEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 22, bool_values }, + { "DepthWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 23, depth_write_mask_values }, + { "DepthFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 24, comparison_values }, + { "StencilEnable", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 25, bool_values }, + { "StencilReadMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 26 }, + { "StencilWriteMask", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 1, 27 }, + { "FrontFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 28, stencil_op_values }, + { "FrontFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 29, stencil_op_values }, + { "FrontFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 30, stencil_op_values }, + { "FrontFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 31, comparison_values }, + { "BackFaceStencilFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 32, stencil_op_values }, + { "BackFaceStencilDepthFail", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 33, stencil_op_values }, + { "BackFaceStencilPass", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 34, stencil_op_values }, + { "BackFaceStencilFunc", HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 35, comparison_values }, + + { "Filter", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 45, filter_values }, + { "AddressU", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 46, address_values }, + { "AddressV", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 47, address_values }, + { "AddressW", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 48, address_values }, + { "MipLODBias", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 49 }, + { "MaxAnisotropy", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 50 }, + { "ComparisonFunc", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 51, compare_func_values }, + { "BorderColor", HLSL_CLASS_SAMPLER, HLSL_CLASS_VECTOR, FX_FLOAT, 4, 1, 52 }, + { "MinLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 53 }, + { "MaxLOD", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_FLOAT, 1, 1, 54 }, + { "Texture", HLSL_CLASS_SAMPLER, HLSL_CLASS_SCALAR, FX_TEXTURE, 1, 1, 55, null_values }, + + { "HullShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_HULLSHADER, 1, 1, 56 }, + { "DomainShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_DOMAINSHADER, 1, 1, 57 }, + { "ComputeShader", HLSL_CLASS_PASS, HLSL_CLASS_SCALAR, FX_COMPUTESHADER, 1, 1, 58 }, }; + + static const struct state fx_4_blend_states[] = + { + { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, + { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, + { "SrcBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 38, blend_values }, + { "DestBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 39, blend_values }, + { "BlendOp", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 40, blendop_values }, + { "SrcBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 41, blend_values }, + { "DestBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 42, blend_values }, + { "BlendOpAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 1, 43, blendop_values }, + { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, + }; + + static const struct state fx_5_blend_states[] = + { + { "AlphaToCoverageEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 1, 36, bool_values }, + { "BlendEnable", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_BOOL, 1, 8, 37, bool_values }, + { "SrcBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 38, blend_values }, + { "DestBlend", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 39, blend_values }, + { "BlendOp", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 40, blendop_values }, + { "SrcBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 41, blend_values }, + { "DestBlendAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 42, blend_values }, + { "BlendOpAlpha", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT, 1, 8, 43, blendop_values }, + { "RenderTargetWriteMask", HLSL_CLASS_BLEND_STATE, HLSL_CLASS_SCALAR, FX_UINT8, 1, 8, 44 }, + }; + + struct state_table + { + const struct state *ptr; + unsigned int count; + } table; + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); struct replace_state_context replace_context; + struct hlsl_type *state_type = NULL; struct hlsl_ir_node *node, *cast; const struct state *state = NULL; struct hlsl_ctx *ctx = fx->ctx; - struct hlsl_type *state_type; + enum hlsl_base_type base_type; unsigned int i; - bool progress; - for (i = 0; i < ARRAY_SIZE(states); ++i) + if (type->class == HLSL_CLASS_BLEND_STATE) { - if (type->class == states[i].container - && !ascii_strcasecmp(entry->name, states[i].name)) + if (ctx->profile->major_version == 4) + { + table.ptr = fx_4_blend_states; + table.count = ARRAY_SIZE(fx_4_blend_states); + } + else { - state = &states[i]; + table.ptr = fx_5_blend_states; + table.count = ARRAY_SIZE(fx_5_blend_states); + } + } + else + { + table.ptr = states; + table.count = ARRAY_SIZE(states); + } + + for (i = 0; i < table.count; ++i) + { + if (type->class == table.ptr[i].container + && !ascii_strcasecmp(entry->name, table.ptr[i].name)) + { + state = &table.ptr[i]; break; } } @@ -1264,69 +1938,327 @@ static void resolve_fx_4_state_block_values(struct hlsl_ir_var *var, struct hlsl return; } + if (entry->lhs_has_index && state->array_size == 1) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Can't use array-style access for non-array state %s.", + entry->name); + return; + } + + if (!entry->lhs_has_index && state->array_size > 1) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Expected array index for array state %s.", + entry->name); + return; + } + + if (entry->lhs_has_index && (state->array_size <= entry->lhs_index)) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Invalid element index %u for the state %s[%u].", + entry->lhs_index, state->name, state->array_size); + return; + } + entry->name_id = state->id; replace_context.values = state->values; replace_context.var = var; - /* Turned named constants to actual constants. */ + /* Turn named constants to actual constants. */ + hlsl_transform_ir(ctx, lower_null_constant, entry->instrs, NULL); hlsl_transform_ir(ctx, replace_state_block_constant, entry->instrs, &replace_context); + hlsl_run_const_passes(ctx, entry->instrs); - if (state->dimx) - state_type = hlsl_get_vector_type(ctx, state->type, state->dimx); - else - state_type = hlsl_get_scalar_type(ctx, state->type); + /* Now cast and run folding again. */ + + if (is_object_fx_type(state->type)) + { + node = entry->args->node; + + switch (node->type) + { + case HLSL_IR_LOAD: + { + struct hlsl_ir_load *load = hlsl_ir_load(node); + + if (load->src.path_len) + hlsl_fixme(ctx, &ctx->location, "Arrays are not supported for RHS."); + + if (load->src.var->data_type->class != hlsl_type_class_from_fx_type(state->type)) + { + hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Type mismatch for the %s state value", + entry->name); + } + + break; + } + case HLSL_IR_CONSTANT: + { + struct hlsl_ir_constant *c = hlsl_ir_constant(node); + struct hlsl_type *data_type = c->node.data_type; + + if (data_type->class == HLSL_CLASS_SCALAR && data_type->e.numeric.type == HLSL_TYPE_UINT) + { + if (c->value.u[0].u != 0) + hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Only 0 integer constants are allowed for object-typed fields."); + } + else + { + hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Unexpected constant used for object-typed field."); + } + + break; + } + default: + hlsl_fixme(ctx, &ctx->location, "Unhandled node type for object-typed field."); + } - /* Cast to expected property type. */ - node = entry->args->node; - if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) return; - list_add_after(&node->entry, &cast->entry); + } - hlsl_src_remove(entry->args); - hlsl_src_from_node(entry->args, cast); + base_type = hlsl_type_from_fx_type(state->type); + switch (state->class) + { + case HLSL_CLASS_VECTOR: + state_type = hlsl_get_vector_type(ctx, base_type, state->dimx); + break; + case HLSL_CLASS_SCALAR: + state_type = hlsl_get_scalar_type(ctx, base_type); + break; + case HLSL_CLASS_TEXTURE: + hlsl_fixme(ctx, &ctx->location, "Object type fields are not supported."); + break; + default: + ; + } - do + if (state_type) { - progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, entry->instrs, NULL); - progress |= hlsl_copy_propagation_execute(ctx, entry->instrs); - } while (progress); + node = entry->args->node; + if (!(cast = hlsl_new_cast(ctx, node, state_type, &var->loc))) + return; + list_add_after(&node->entry, &cast->entry); + + /* FX_UINT8 values are using 32-bits in the binary. Mask higher 24 bits for those. */ + if (state->type == FX_UINT8) + { + struct hlsl_ir_node *mask; + + if (!(mask = hlsl_new_uint_constant(ctx, 0xff, &var->loc))) + return; + list_add_after(&cast->entry, &mask->entry); + + if (!(cast = hlsl_new_binary_expr(ctx, HLSL_OP2_BIT_AND, cast, mask))) + return; + list_add_after(&mask->entry, &cast->entry); + } + + hlsl_src_remove(entry->args); + hlsl_src_from_node(entry->args, cast); + + hlsl_run_const_passes(ctx, entry->instrs); + } } -static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) +static bool decompose_fx_4_state_add_entries(struct hlsl_state_block *block, unsigned int entry_index, + unsigned int count) { - uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i, j; - struct vkd3d_bytecode_buffer *buffer = &fx->structured; - uint32_t count_offset, count; + if (!vkd3d_array_reserve((void **)&block->entries, &block->capacity, block->count + count, sizeof(*block->entries))) + return false; - for (i = 0; i < elements_count; ++i) + if (entry_index != block->count - 1) { - struct hlsl_state_block *block; + memmove(&block->entries[entry_index + count + 1], &block->entries[entry_index + 1], + (block->count - entry_index - 1) * sizeof(*block->entries)); + } + block->count += count; - count_offset = put_u32(buffer, 0); + return true; +} + +static unsigned int decompose_fx_4_state_function_call(struct hlsl_ir_var *var, struct hlsl_state_block *block, + unsigned int entry_index, struct fx_write_context *fx) +{ + struct hlsl_state_block_entry *entry = block->entries[entry_index]; + const struct state_block_function_info *info; + struct function_component components[9]; + struct hlsl_ctx *ctx = fx->ctx; + unsigned int i; + + if (!entry->is_function_call) + return 1; + + if (!(info = get_state_block_function_info(entry->name))) + return 1; + + if (info->min_profile > ctx->profile->major_version) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY, + "State %s is not supported for this profile.", entry->name); + return 1; + } + + /* For single argument case simply replace the name. */ + if (info->min_args == info->max_args && info->min_args == 1) + { + vkd3d_free(entry->name); + entry->name = hlsl_strdup(ctx, info->components[0].name); + return 1; + } + + if (!decompose_fx_4_state_add_entries(block, entry_index, entry->args_count - 1)) + return 1; - count = 0; - if (var->state_blocks) + get_state_block_function_components(info, components, entry->args_count); + + for (i = 0; i < entry->args_count; ++i) + { + const struct function_component *comp = &components[i]; + unsigned int arg_index = (i + 1) % entry->args_count; + block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, comp->name, + comp->lhs_has_index, comp->lhs_index, arg_index); + } + hlsl_free_state_block_entry(entry); + + return entry->args_count; +} + +/* For some states assignment sets all of the elements. This behaviour is limited to certain states of BlendState + object, and only when fx_5_0 profile is used. */ +static unsigned int decompose_fx_4_state_block_expand_array(struct hlsl_ir_var *var, struct hlsl_state_block *block, + unsigned int entry_index, struct fx_write_context *fx) +{ + static const char *states[] = { "SrcBlend", "DestBlend", "BlendOp", "SrcBlendAlpha", "DestBlendAlpha", "BlendOpAlpha" }; + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + struct hlsl_state_block_entry *entry = block->entries[entry_index]; + static const unsigned int array_size = 8; + struct hlsl_ctx *ctx = fx->ctx; + bool found = false; + unsigned int i; + + if (type->class != HLSL_CLASS_BLEND_STATE) + return 1; + if (ctx->profile->major_version != 5) + return 1; + if (entry->lhs_has_index) + return 1; + + for (i = 0; i < ARRAY_SIZE(states); ++i) + { + if (!ascii_strcasecmp(entry->name, states[i])) { - block = var->state_blocks[i]; + found = true; + break; + } + } - for (j = 0; j < block->count; ++j) - { - struct hlsl_state_block_entry *entry = block->entries[j]; + if (!found) + return 1; + + if (!decompose_fx_4_state_add_entries(block, entry_index, array_size - 1)) + return 1; + + block->entries[entry_index]->lhs_has_index = true; + for (i = 1; i < array_size; ++i) + { + block->entries[entry_index + i] = clone_stateblock_entry(ctx, entry, + entry->name, true, i, 0); + } - /* Skip if property is reassigned later. This will use the last assignment. */ - if (state_block_contains_state(entry->name, j + 1, block)) - continue; + return array_size; +} - /* Resolve special constant names and property names. */ - resolve_fx_4_state_block_values(var, entry, fx); +static unsigned int decompose_fx_4_state_block(struct hlsl_ir_var *var, struct hlsl_state_block *block, + unsigned int entry_index, struct fx_write_context *fx) +{ + struct hlsl_state_block_entry *entry = block->entries[entry_index]; - write_fx_4_state_assignment(var, entry, fx); - ++count; - } + if (entry->is_function_call) + return decompose_fx_4_state_function_call(var, block, entry_index, fx); + + return decompose_fx_4_state_block_expand_array(var, block, entry_index, fx); +} + +static void write_fx_4_state_block(struct hlsl_ir_var *var, unsigned int block_index, + uint32_t count_offset, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + struct hlsl_state_block *block; + uint32_t i, count = 0; + + if (var->state_blocks) + { + block = var->state_blocks[block_index]; + + for (i = 0; i < block->count;) + { + i += decompose_fx_4_state_block(var, block, i, fx); } - set_u32(buffer, count_offset, count); + for (i = 0; i < block->count; ++i) + { + struct hlsl_state_block_entry *entry = block->entries[i]; + + /* Skip if property is reassigned later. This will use the last assignment. */ + if (state_block_contains_state(entry, i + 1, block)) + continue; + + /* Resolve special constant names and property names. */ + resolve_fx_4_state_block_values(var, entry, fx); + + write_fx_4_state_assignment(var, entry, fx); + ++count; + } + } + + set_u32(buffer, count_offset, count); +} + +static void write_fx_4_state_object_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) +{ + uint32_t elements_count = hlsl_get_multiarray_size(var->data_type), i; + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t count_offset; + + for (i = 0; i < elements_count; ++i) + { + count_offset = put_u32(buffer, 0); + + write_fx_4_state_block(var, i, count_offset, fx); + } +} + +static void write_fx_4_shader_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); + unsigned int i; + + /* FIXME: write shader blobs, once parser support works. */ + for (i = 0; i < elements_count; ++i) + put_u32(buffer, 0); +} + +static void write_fx_5_shader_initializer(struct hlsl_ir_var *var, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); + unsigned int i; + + /* FIXME: write shader blobs, once parser support works. */ + for (i = 0; i < elements_count; ++i) + { + put_u32(buffer, 0); /* Blob offset */ + put_u32(buffer, 0); /* SODecl[0] offset */ + put_u32(buffer, 0); /* SODecl[1] offset */ + put_u32(buffer, 0); /* SODecl[2] offset */ + put_u32(buffer, 0); /* SODecl[3] offset */ + put_u32(buffer, 0); /* SODecl count */ + put_u32(buffer, 0); /* Rasterizer stream */ + put_u32(buffer, 0); /* Interface bindings count */ + put_u32(buffer, 0); /* Interface initializer offset */ } } @@ -1336,7 +2268,7 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); struct vkd3d_bytecode_buffer *buffer = &fx->structured; uint32_t semantic_offset, bind_point = ~0u; - uint32_t name_offset, type_offset, i; + uint32_t name_offset, type_offset; struct hlsl_ctx *ctx = fx->ctx; if (var->reg_reservation.reg_type) @@ -1344,7 +2276,7 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ type_offset = write_type(var->data_type, fx); name_offset = write_string(var->name, fx); - semantic_offset = write_string(var->semantic.name, fx); + semantic_offset = write_string(var->semantic.raw_name, fx); put_u32(buffer, name_offset); put_u32(buffer, type_offset); @@ -1373,9 +2305,14 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_VERTEX_SHADER: - /* FIXME: write shader blobs, once parser support works. */ - for (i = 0; i < elements_count; ++i) - put_u32(buffer, 0); + write_fx_4_shader_initializer(var, fx); + fx->shader_count += elements_count; + break; + + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + write_fx_5_shader_initializer(var, fx); fx->shader_count += elements_count; break; @@ -1383,19 +2320,37 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ fx->dsv_count += elements_count; break; + case HLSL_CLASS_DEPTH_STENCIL_STATE: + write_fx_4_state_object_initializer(var, fx); + fx->depth_stencil_state_count += elements_count; + break; + case HLSL_CLASS_SAMPLER: write_fx_4_state_object_initializer(var, fx); fx->sampler_state_count += elements_count; break; + case HLSL_CLASS_RASTERIZER_STATE: + write_fx_4_state_object_initializer(var, fx); + fx->rasterizer_state_count += elements_count; + break; + + case HLSL_CLASS_BLEND_STATE: + write_fx_4_state_object_initializer(var, fx); + fx->blend_state_count += elements_count; + break; + + case HLSL_CLASS_STRING: + write_fx_4_string_initializer(var, fx); + fx->string_count += elements_count; + break; + default: - hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", - type->e.numeric.type); + hlsl_fixme(ctx, &ctx->location, "Writing initializer for object class %u is not implemented.", + type->class); } - put_u32(buffer, 0); /* Annotations count */ - if (has_annotations(var)) - hlsl_fixme(ctx, &ctx->location, "Writing annotations for object variables is not implemented."); + write_fx_4_annotations(var->annotations, fx); ++fx->object_variable_count; } @@ -1438,9 +2393,7 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx } else { - put_u32(buffer, 0); /* Annotations count */ - if (b->annotations) - hlsl_fixme(ctx, &b->loc, "Writing annotations for buffers is not implemented."); + write_fx_4_annotations(b->annotations, fx); ++fx->buffer_count; } @@ -1464,6 +2417,9 @@ static void write_buffers(struct fx_write_context *fx, bool shared) { struct hlsl_buffer *buffer; + if (shared && !fx->child_effect) + return; + LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) { if (!buffer->size && !fx->include_empty_buffers) @@ -1483,11 +2439,22 @@ static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struc switch (type->class) { + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_STRING: + return true; + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + if (ctx->profile->major_version < 5) + return false; return true; case HLSL_CLASS_UAV: if (ctx->profile->major_version < 5) @@ -1495,8 +2462,6 @@ static bool is_supported_object_variable(const struct hlsl_ctx *ctx, const struc if (type->e.resource.rasteriser_ordered) return false; return true; - case HLSL_CLASS_VERTEX_SHADER: - return true; default: return false; @@ -1549,11 +2514,11 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&buffer, fx.shared_object_count); put_u32(&buffer, fx.technique_count); size_offset = put_u32(&buffer, 0); /* Unstructured size. */ - put_u32(&buffer, 0); /* String count. */ + put_u32(&buffer, fx.string_count); put_u32(&buffer, fx.texture_count); - put_u32(&buffer, 0); /* Depth stencil state count. */ - put_u32(&buffer, 0); /* Blend state count. */ - put_u32(&buffer, 0); /* Rasterizer state count. */ + put_u32(&buffer, fx.depth_stencil_state_count); + put_u32(&buffer, fx.blend_state_count); + put_u32(&buffer, fx.rasterizer_state_count); put_u32(&buffer, fx.sampler_state_count); put_u32(&buffer, fx.rtv_count); put_u32(&buffer, fx.dsv_count); @@ -1607,11 +2572,11 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&buffer, fx.shared_object_count); put_u32(&buffer, fx.technique_count); size_offset = put_u32(&buffer, 0); /* Unstructured size. */ - put_u32(&buffer, 0); /* String count. */ + put_u32(&buffer, fx.string_count); put_u32(&buffer, fx.texture_count); - put_u32(&buffer, 0); /* Depth stencil state count. */ - put_u32(&buffer, 0); /* Blend state count. */ - put_u32(&buffer, 0); /* Rasterizer state count. */ + put_u32(&buffer, fx.depth_stencil_state_count); + put_u32(&buffer, fx.blend_state_count); + put_u32(&buffer, fx.rasterizer_state_count); put_u32(&buffer, fx.sampler_state_count); put_u32(&buffer, fx.rtv_count); put_u32(&buffer, fx.dsv_count); diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index 3e482a5fc70..d1f02ab568b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -18,10 +18,23 @@ #include "vkd3d_shader_private.h" +struct glsl_src +{ + struct vkd3d_string_buffer *str; +}; + +struct glsl_dst +{ + const struct vkd3d_shader_dst_param *vsir; + struct vkd3d_string_buffer *register_name; + struct vkd3d_string_buffer *mask; +}; + struct vkd3d_glsl_generator { struct vsir_program *program; - struct vkd3d_string_buffer buffer; + struct vkd3d_string_buffer_cache string_buffers; + struct vkd3d_string_buffer *buffer; struct vkd3d_shader_location location; struct vkd3d_shader_message_context *message_context; unsigned int indent; @@ -45,18 +58,149 @@ static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigne vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); } +static void shader_glsl_print_register_name(struct vkd3d_string_buffer *buffer, + struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_register *reg) +{ + switch (reg->type) + { + case VKD3DSPR_TEMP: + vkd3d_string_buffer_printf(buffer, "r[%u]", reg->idx[0].offset); + break; + + default: + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled register type %#x.", reg->type); + vkd3d_string_buffer_printf(buffer, "", reg->type); + break; + } +} + +static void shader_glsl_print_swizzle(struct vkd3d_string_buffer *buffer, uint32_t swizzle, uint32_t mask) +{ + const char swizzle_chars[] = "xyzw"; + unsigned int i; + + vkd3d_string_buffer_printf(buffer, "."); + for (i = 0; i < VKD3D_VEC4_SIZE; ++i) + { + if (mask & (VKD3DSP_WRITEMASK_0 << i)) + vkd3d_string_buffer_printf(buffer, "%c", swizzle_chars[vsir_swizzle_get_component(swizzle, i)]); + } +} + +static void shader_glsl_print_write_mask(struct vkd3d_string_buffer *buffer, uint32_t write_mask) +{ + vkd3d_string_buffer_printf(buffer, "."); + if (write_mask & VKD3DSP_WRITEMASK_0) + vkd3d_string_buffer_printf(buffer, "x"); + if (write_mask & VKD3DSP_WRITEMASK_1) + vkd3d_string_buffer_printf(buffer, "y"); + if (write_mask & VKD3DSP_WRITEMASK_2) + vkd3d_string_buffer_printf(buffer, "z"); + if (write_mask & VKD3DSP_WRITEMASK_3) + vkd3d_string_buffer_printf(buffer, "w"); +} + +static void glsl_src_cleanup(struct glsl_src *src, struct vkd3d_string_buffer_cache *cache) +{ + vkd3d_string_buffer_release(cache, src->str); +} + +static void glsl_src_init(struct glsl_src *glsl_src, struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_src_param *vsir_src, uint32_t mask) +{ + const struct vkd3d_shader_register *reg = &vsir_src->reg; + + glsl_src->str = vkd3d_string_buffer_get(&gen->string_buffers); + + if (reg->non_uniform) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled 'non-uniform' modifier."); + if (vsir_src->modifiers) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled source modifier(s) %#x.", vsir_src->modifiers); + + shader_glsl_print_register_name(glsl_src->str, gen, reg); + if (reg->dimension == VSIR_DIMENSION_VEC4) + shader_glsl_print_swizzle(glsl_src->str, vsir_src->swizzle, mask); +} + +static void glsl_dst_cleanup(struct glsl_dst *dst, struct vkd3d_string_buffer_cache *cache) +{ + vkd3d_string_buffer_release(cache, dst->mask); + vkd3d_string_buffer_release(cache, dst->register_name); +} + +static uint32_t glsl_dst_init(struct glsl_dst *glsl_dst, struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_dst_param *vsir_dst) +{ + uint32_t write_mask = vsir_dst->write_mask; + + if (ins->flags & VKD3DSI_PRECISE_XYZW) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled 'precise' modifier."); + if (vsir_dst->reg.non_uniform) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled 'non-uniform' modifier."); + + glsl_dst->vsir = vsir_dst; + glsl_dst->register_name = vkd3d_string_buffer_get(&gen->string_buffers); + glsl_dst->mask = vkd3d_string_buffer_get(&gen->string_buffers); + + shader_glsl_print_register_name(glsl_dst->register_name, gen, &vsir_dst->reg); + shader_glsl_print_write_mask(glsl_dst->mask, write_mask); + + return write_mask; +} + +static void VKD3D_PRINTF_FUNC(3, 4) shader_glsl_print_assignment( + struct vkd3d_glsl_generator *gen, struct glsl_dst *dst, const char *format, ...) +{ + va_list args; + + if (dst->vsir->shift) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled destination shift %#x.", dst->vsir->shift); + if (dst->vsir->modifiers) + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled destination modifier(s) %#x.", dst->vsir->modifiers); + + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "%s%s = ", dst->register_name->buffer, dst->mask->buffer); + + va_start(args, format); + vkd3d_string_buffer_vprintf(gen->buffer, format, args); + va_end(args); + + vkd3d_string_buffer_printf(gen->buffer, ";\n"); +} + static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) { - shader_glsl_print_indent(&gen->buffer, gen->indent); - vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "/* */\n", ins->opcode); vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); + "Internal compiler error: Unhandled instruction %#x.", ins->opcode); } -static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, - const struct vkd3d_shader_instruction *ins) +static void shader_glsl_mov(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + struct glsl_src src; + struct glsl_dst dst; + uint32_t mask; + + mask = glsl_dst_init(&dst, gen, ins, &ins->dst[0]); + glsl_src_init(&src, gen, &ins->src[0], mask); + + shader_glsl_print_assignment(gen, &dst, "%s", src.str->buffer); + + glsl_src_cleanup(&src, &gen->string_buffers); + glsl_dst_cleanup(&dst, &gen->string_buffers); +} + +static void shader_glsl_ret(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) { - const struct vkd3d_shader_version *version = &generator->program->shader_version; + const struct vkd3d_shader_version *version = &gen->program->shader_version; /* * TODO: Implement in_subroutine @@ -64,45 +208,59 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, */ if (version->major >= 4) { - shader_glsl_print_indent(&generator->buffer, generator->indent); - vkd3d_string_buffer_printf(&generator->buffer, "return;\n"); + shader_glsl_print_indent(gen->buffer, gen->indent); + vkd3d_string_buffer_printf(gen->buffer, "return;\n"); } } -static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator, - const struct vkd3d_shader_instruction *instruction) +static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen, + const struct vkd3d_shader_instruction *ins) { - generator->location = instruction->location; + gen->location = ins->location; - switch (instruction->handler_idx) + switch (ins->opcode) { case VKD3DSIH_DCL_INPUT: case VKD3DSIH_DCL_OUTPUT: case VKD3DSIH_DCL_OUTPUT_SIV: case VKD3DSIH_NOP: break; + case VKD3DSIH_MOV: + shader_glsl_mov(gen, ins); + break; case VKD3DSIH_RET: - shader_glsl_ret(generator, instruction); + shader_glsl_ret(gen, ins); break; default: - shader_glsl_unhandled(generator, instruction); + shader_glsl_unhandled(gen, ins); break; } } +static void shader_glsl_generate_declarations(struct vkd3d_glsl_generator *gen) +{ + const struct vsir_program *program = gen->program; + struct vkd3d_string_buffer *buffer = gen->buffer; + + if (program->temp_count) + vkd3d_string_buffer_printf(buffer, "vec4 r[%u];\n\n", program->temp_count); +} + static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struct vkd3d_shader_code *out) { const struct vkd3d_shader_instruction_array *instructions = &gen->program->instructions; - struct vkd3d_string_buffer *buffer = &gen->buffer; + struct vkd3d_string_buffer *buffer = gen->buffer; unsigned int i; void *code; - ERR("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); + MESSAGE("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); vkd3d_string_buffer_printf(buffer, "#version 440\n\n"); vkd3d_string_buffer_printf(buffer, "/* Generated by %s. */\n\n", vkd3d_shader_get_version(NULL, NULL)); + shader_glsl_generate_declarations(gen); + vkd3d_string_buffer_printf(buffer, "void main()\n{\n"); ++gen->indent; @@ -132,7 +290,8 @@ static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *gen, struc static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) { - vkd3d_string_buffer_cleanup(&gen->buffer); + vkd3d_string_buffer_release(&gen->string_buffers, gen->buffer); + vkd3d_string_buffer_cache_cleanup(&gen->string_buffers); } static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, @@ -140,7 +299,8 @@ static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, { memset(gen, 0, sizeof(*gen)); gen->program = program; - vkd3d_string_buffer_init(&gen->buffer); + vkd3d_string_buffer_cache_init(&gen->string_buffers); + gen->buffer = vkd3d_string_buffer_get(&gen->string_buffers); gen->message_context = message_context; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 99214fba6de..bd5baacd83d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -134,7 +134,7 @@ struct hlsl_ir_var *hlsl_get_var(struct hlsl_scope *scope, const char *name) return hlsl_get_var(scope->upper, name); } -static void free_state_block_entry(struct hlsl_state_block_entry *entry) +void hlsl_free_state_block_entry(struct hlsl_state_block_entry *entry) { unsigned int i; @@ -151,9 +151,9 @@ void hlsl_free_state_block(struct hlsl_state_block *state_block) { unsigned int k; - assert(state_block); + VKD3D_ASSERT(state_block); for (k = 0; k < state_block->count; ++k) - free_state_block_entry(state_block->entries[k]); + hlsl_free_state_block_entry(state_block->entries[k]); vkd3d_free(state_block->entries); vkd3d_free(state_block); } @@ -167,6 +167,15 @@ void hlsl_free_var(struct hlsl_ir_var *decl) for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k) vkd3d_free((void *)decl->objects_usage[k]); + if (decl->default_values) + { + unsigned int component_count = hlsl_type_component_count(decl->data_type); + + for (k = 0; k < component_count; ++k) + vkd3d_free((void *)decl->default_values[k].string); + vkd3d_free(decl->default_values); + } + for (i = 0; i < decl->state_block_count; ++i) hlsl_free_state_block(decl->state_blocks[i]); vkd3d_free(decl->state_blocks); @@ -367,15 +376,24 @@ static void hlsl_type_calculate_reg_size(struct hlsl_ctx *ctx, struct hlsl_type type->reg_size[HLSL_REGSET_UAVS] = 1; break; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_STRING: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VERTEX_SHADER: case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_NULL: break; } } @@ -435,21 +453,30 @@ static bool type_is_single_component(const struct hlsl_type *type) { switch (type->class) { + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_PIXEL_SHADER: case HLSL_CLASS_SCALAR: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_STRING: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_TEXTURE: case HLSL_CLASS_UAV: case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_NULL: return true; case HLSL_CLASS_VECTOR: case HLSL_CLASS_MATRIX: case HLSL_CLASS_STRUCT: case HLSL_CLASS_ARRAY: + case HLSL_CLASS_CONSTANT_BUFFER: return false; case HLSL_CLASS_EFFECT_GROUP: @@ -474,13 +501,13 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, struct hlsl_type *type = *type_ptr; unsigned int index = *index_ptr; - assert(!type_is_single_component(type)); - assert(index < hlsl_type_component_count(type)); + VKD3D_ASSERT(!type_is_single_component(type)); + VKD3D_ASSERT(index < hlsl_type_component_count(type)); switch (type->class) { case HLSL_CLASS_VECTOR: - assert(index < type->dimx); + VKD3D_ASSERT(index < type->dimx); *type_ptr = hlsl_get_scalar_type(ctx, type->e.numeric.type); *index_ptr = 0; return index; @@ -490,7 +517,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, unsigned int y = index / type->dimx, x = index % type->dimx; bool row_major = hlsl_type_is_row_major(type); - assert(index < type->dimx * type->dimy); + VKD3D_ASSERT(index < type->dimx * type->dimy); *type_ptr = hlsl_get_vector_type(ctx, type->e.numeric.type, row_major ? type->dimx : type->dimy); *index_ptr = row_major ? x : y; return row_major ? y : x; @@ -504,7 +531,7 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, *type_ptr = type->e.array.type; *index_ptr = index % elem_comp_count; array_index = index / elem_comp_count; - assert(array_index < type->e.array.elements_count); + VKD3D_ASSERT(array_index < type->e.array.elements_count); return array_index; } @@ -528,6 +555,12 @@ static unsigned int traverse_path_from_component_index(struct hlsl_ctx *ctx, vkd3d_unreachable(); } + case HLSL_CLASS_CONSTANT_BUFFER: + { + *type_ptr = type->e.resource.format; + return traverse_path_from_component_index(ctx, type_ptr, index_ptr); + } + default: vkd3d_unreachable(); } @@ -556,12 +589,14 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty switch (type->class) { - case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: - case HLSL_CLASS_MATRIX: offset[HLSL_REGSET_NUMERIC] += idx; break; + case HLSL_CLASS_MATRIX: + offset[HLSL_REGSET_NUMERIC] += 4 * idx; + break; + case HLSL_CLASS_STRUCT: for (r = 0; r <= HLSL_REGSET_LAST; ++r) offset[r] += type->e.record.fields[idx].reg_offset[r]; @@ -577,21 +612,31 @@ unsigned int hlsl_type_get_component_offset(struct hlsl_ctx *ctx, struct hlsl_ty } break; + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_STRING: case HLSL_CLASS_TEXTURE: case HLSL_CLASS_UAV: case HLSL_CLASS_VERTEX_SHADER: - assert(idx == 0); + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + VKD3D_ASSERT(idx == 0); break; case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_NULL: vkd3d_unreachable(); } type = next_type; @@ -638,9 +683,9 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d deref->rel_offset.node = NULL; deref->const_offset = 0; - assert(chain); + VKD3D_ASSERT(chain); if (chain->type == HLSL_IR_INDEX) - assert(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); + VKD3D_ASSERT(!hlsl_index_is_noncontiguous(hlsl_ir_index(chain))); /* Find the length of the index chain */ chain_len = 0; @@ -687,7 +732,7 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d chain_len++; ptr = index->val.node; } - assert(deref->path_len == load->src.path_len + chain_len); + VKD3D_ASSERT(deref->path_len == load->src.path_len + chain_len); return true; } @@ -697,7 +742,7 @@ struct hlsl_type *hlsl_deref_get_type(struct hlsl_ctx *ctx, const struct hlsl_de struct hlsl_type *type; unsigned int i; - assert(deref); + VKD3D_ASSERT(deref); if (hlsl_deref_is_lowered(deref)) return deref->data_type; @@ -752,7 +797,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl hlsl_src_from_node(&deref->path[deref_path_len++], c); } - assert(deref_path_len == deref->path_len); + VKD3D_ASSERT(deref_path_len == deref->path_len); return true; } @@ -760,7 +805,7 @@ static bool init_deref_from_component_index(struct hlsl_ctx *ctx, struct hlsl_bl struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, const struct hlsl_type *type, struct hlsl_ir_node *idx) { - assert(idx); + VKD3D_ASSERT(idx); switch (type->class) { @@ -780,7 +825,7 @@ struct hlsl_type *hlsl_get_element_type_from_path_index(struct hlsl_ctx *ctx, co { struct hlsl_ir_constant *c = hlsl_ir_constant(idx); - assert(c->value.u[0].u < type->e.record.field_count); + VKD3D_ASSERT(c->value.u[0].u < type->e.record.field_count); return type->e.record.fields[c->value.u[0].u].type; } @@ -865,6 +910,20 @@ struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim return type; } +struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format) +{ + struct hlsl_type *type; + + if (!(type = hlsl_alloc(ctx, sizeof(*type)))) + return NULL; + type->class = HLSL_CLASS_CONSTANT_BUFFER; + type->dimy = 1; + type->e.resource.format = format; + hlsl_type_calculate_reg_size(ctx, type); + list_add_tail(&ctx->types, &type->entry); + return type; +} + static const char * get_case_insensitive_typename(const char *name) { static const char *const names[] = @@ -876,6 +935,7 @@ static const char * get_case_insensitive_typename(const char *name) "texture", "vector", "vertexshader", + "string", }; unsigned int i; @@ -956,14 +1016,25 @@ unsigned int hlsl_type_component_count(const struct hlsl_type *type) case HLSL_CLASS_ARRAY: return hlsl_type_component_count(type->e.array.type) * type->e.array.elements_count; + case HLSL_CLASS_CONSTANT_BUFFER: + return hlsl_type_component_count(type->e.resource.format); + + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_STRING: case HLSL_CLASS_TEXTURE: case HLSL_CLASS_UAV: case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_NULL: return 1; case HLSL_CLASS_EFFECT_GROUP: @@ -1038,14 +1109,25 @@ bool hlsl_types_are_equal(const struct hlsl_type *t1, const struct hlsl_type *t2 case HLSL_CLASS_TECHNIQUE: return t1->e.version == t2->e.version; + case HLSL_CLASS_CONSTANT_BUFFER: + return hlsl_types_are_equal(t1->e.resource.format, t2->e.resource.format); + + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_STRING: case HLSL_CLASS_VERTEX_SHADER: case HLSL_CLASS_VOID: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_NULL: return true; } @@ -1247,6 +1329,7 @@ struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const cha list_add_tail(&ctx->dummy_scope->vars, &var->scope_entry); else list_add_tail(&ctx->globals->vars, &var->scope_entry); + var->is_synthetic = true; } return var; } @@ -1265,7 +1348,7 @@ bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struc if (!other) return true; - assert(!hlsl_deref_is_lowered(other)); + VKD3D_ASSERT(!hlsl_deref_is_lowered(other)); if (!init_deref(ctx, deref, other->var, other->path_len)) return false; @@ -1322,8 +1405,8 @@ struct hlsl_ir_node *hlsl_new_store_index(struct hlsl_ctx *ctx, const struct hls struct hlsl_ir_store *store; unsigned int i; - assert(lhs); - assert(!hlsl_deref_is_lowered(lhs)); + VKD3D_ASSERT(lhs); + VKD3D_ASSERT(!hlsl_deref_is_lowered(lhs)); if (!(store = hlsl_alloc(ctx, sizeof(*store)))) return NULL; @@ -1394,7 +1477,7 @@ struct hlsl_ir_node *hlsl_new_constant(struct hlsl_ctx *ctx, struct hlsl_type *t { struct hlsl_ir_constant *c; - assert(type->class <= HLSL_CLASS_VECTOR); + VKD3D_ASSERT(type->class <= HLSL_CLASS_VECTOR || type->class == HLSL_CLASS_NULL); if (!(c = hlsl_alloc(ctx, sizeof(*c)))) return NULL; @@ -1439,6 +1522,30 @@ struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n return hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), &value, loc); } +struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_string_constant *s; + + if (!(s = hlsl_alloc(ctx, sizeof(*s)))) + return NULL; + + init_node(&s->node, HLSL_IR_STRING_CONSTANT, ctx->builtin_types.string, loc); + + if (!(s->string = hlsl_strdup(ctx, str))) + { + hlsl_free_instr(&s->node); + return NULL; + } + return &s->node; +} + +struct hlsl_ir_node *hlsl_new_null_constant(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc) +{ + struct hlsl_constant_value value = { 0 }; + return hlsl_new_constant(ctx, ctx->builtin_types.null, &value, loc); +} + struct hlsl_ir_node *hlsl_new_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS], struct hlsl_type *data_type, const struct vkd3d_shader_location *loc) @@ -1468,7 +1575,7 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2}; - assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); + VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } @@ -1477,8 +1584,8 @@ struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_ex { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; - assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); - assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); + VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); + VKD3D_ASSERT(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } @@ -1540,7 +1647,7 @@ struct hlsl_ir_load *hlsl_new_load_index(struct hlsl_ctx *ctx, const struct hlsl struct hlsl_type *type; unsigned int i; - assert(!hlsl_deref_is_lowered(deref)); + VKD3D_ASSERT(!hlsl_deref_is_lowered(deref)); type = hlsl_deref_get_type(ctx, deref); if (idx) @@ -1569,7 +1676,7 @@ struct hlsl_ir_load *hlsl_new_load_parent(struct hlsl_ctx *ctx, const struct hls /* This deref can only exists temporarily because it is not the real owner of its members. */ struct hlsl_deref tmp_deref; - assert(deref->path_len >= 1); + VKD3D_ASSERT(deref->path_len >= 1); tmp_deref = *deref; tmp_deref.path_len = deref->path_len - 1; @@ -1674,7 +1781,7 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned if (!(swizzle = hlsl_alloc(ctx, sizeof(*swizzle)))) return NULL; - assert(hlsl_is_numeric_type(val->data_type)); + VKD3D_ASSERT(hlsl_is_numeric_type(val->data_type)); if (components == 1) type = hlsl_get_scalar_type(ctx, val->data_type->e.numeric.type); else @@ -1765,7 +1872,8 @@ struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type } struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - struct hlsl_block *block, const struct vkd3d_shader_location *loc) + struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, + unsigned int unroll_limit, const struct vkd3d_shader_location *loc) { struct hlsl_ir_loop *loop; @@ -1774,6 +1882,9 @@ struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, init_node(&loop->node, HLSL_IR_LOOP, NULL, loc); hlsl_block_init(&loop->body); hlsl_block_add_block(&loop->body, block); + + loop->unroll_type = unroll_type; + loop->unroll_limit = unroll_limit; return &loop->node; } @@ -1836,9 +1947,7 @@ static struct hlsl_ir_node *map_instr(const struct clone_instr_map *map, struct return map->instrs[i].dst; } - /* The block passed to hlsl_clone_block() should have been free of external - * references. */ - vkd3d_unreachable(); + return src; } static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, @@ -1846,7 +1955,7 @@ static bool clone_deref(struct hlsl_ctx *ctx, struct clone_instr_map *map, { unsigned int i; - assert(!hlsl_deref_is_lowered(src)); + VKD3D_ASSERT(!hlsl_deref_is_lowered(src)); if (!init_deref(ctx, dst, src->var, src->path_len)) return false; @@ -1935,7 +2044,7 @@ static struct hlsl_ir_node *clone_loop(struct hlsl_ctx *ctx, struct clone_instr_ if (!clone_block(ctx, &body, &src->body, map)) return NULL; - if (!(dst = hlsl_new_loop(ctx, &body, &src->node.loc))) + if (!(dst = hlsl_new_loop(ctx, &body, src->unroll_type, src->unroll_limit, &src->node.loc))) { hlsl_block_cleanup(&body); return NULL; @@ -1992,6 +2101,11 @@ static struct hlsl_ir_node *clone_resource_store(struct hlsl_ctx *ctx, return &dst->node; } +static struct hlsl_ir_node *clone_string_constant(struct hlsl_ctx *ctx, struct hlsl_ir_string_constant *src) +{ + return hlsl_new_string_constant(ctx, src->string, &src->node.loc); +} + static struct hlsl_ir_node *clone_store(struct hlsl_ctx *ctx, struct clone_instr_map *map, struct hlsl_ir_store *src) { struct hlsl_ir_store *dst; @@ -2034,6 +2148,43 @@ static struct hlsl_ir_node *clone_stateblock_constant(struct hlsl_ctx *ctx, return hlsl_new_stateblock_constant(ctx, constant->name, &constant->node.loc); } +struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, + struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, + unsigned int lhs_index, unsigned int arg_index) +{ + struct hlsl_state_block_entry *entry; + struct clone_instr_map map = { 0 }; + + if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) + return NULL; + entry->name = hlsl_strdup(ctx, name); + entry->lhs_has_index = lhs_has_index; + entry->lhs_index = lhs_index; + if (!(entry->instrs = hlsl_alloc(ctx, sizeof(*entry->instrs)))) + { + hlsl_free_state_block_entry(entry); + return NULL; + } + + entry->args_count = 1; + if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) + { + hlsl_free_state_block_entry(entry); + return NULL; + } + + hlsl_block_init(entry->instrs); + if (!clone_block(ctx, entry->instrs, src->instrs, &map)) + { + hlsl_free_state_block_entry(entry); + return NULL; + } + clone_src(&map, entry->args, &src->args[arg_index]); + vkd3d_free(map.instrs); + + return entry; +} + void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c) { hlsl_block_cleanup(&c->body); @@ -2121,6 +2272,9 @@ static struct hlsl_ir_node *clone_instr(struct hlsl_ctx *ctx, case HLSL_IR_RESOURCE_STORE: return clone_resource_store(ctx, map, hlsl_ir_resource_store(instr)); + case HLSL_IR_STRING_CONSTANT: + return clone_string_constant(ctx, hlsl_ir_string_constant(instr)); + case HLSL_IR_STORE: return clone_store(ctx, map, hlsl_ir_store(instr)); @@ -2249,7 +2403,7 @@ void hlsl_pop_scope(struct hlsl_ctx *ctx) { struct hlsl_scope *prev_scope = ctx->cur_scope->upper; - assert(prev_scope); + VKD3D_ASSERT(prev_scope); TRACE("Popping current scope.\n"); ctx->cur_scope = prev_scope; } @@ -2327,17 +2481,17 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru switch (type->class) { case HLSL_CLASS_SCALAR: - assert(type->e.numeric.type < ARRAY_SIZE(base_types)); + VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); vkd3d_string_buffer_printf(string, "%s", base_types[type->e.numeric.type]); return string; case HLSL_CLASS_VECTOR: - assert(type->e.numeric.type < ARRAY_SIZE(base_types)); + VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); vkd3d_string_buffer_printf(string, "%s%u", base_types[type->e.numeric.type], type->dimx); return string; case HLSL_CLASS_MATRIX: - assert(type->e.numeric.type < ARRAY_SIZE(base_types)); + VKD3D_ASSERT(type->e.numeric.type < ARRAY_SIZE(base_types)); vkd3d_string_buffer_printf(string, "%s%ux%u", base_types[type->e.numeric.type], type->dimy, type->dimx); return string; @@ -2375,15 +2529,15 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru return string; } - assert(hlsl_is_numeric_type(type->e.resource.format)); - assert(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); + VKD3D_ASSERT(hlsl_is_numeric_type(type->e.resource.format)); + VKD3D_ASSERT(type->e.resource.format->e.numeric.type < ARRAY_SIZE(base_types)); if (type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER) { vkd3d_string_buffer_printf(string, "Buffer"); } else { - assert(type->sampler_dim < ARRAY_SIZE(dimensions)); + VKD3D_ASSERT(type->sampler_dim < ARRAY_SIZE(dimensions)); vkd3d_string_buffer_printf(string, "Texture%s", dimensions[type->sampler_dim]); } if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) @@ -2407,16 +2561,33 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru } return string; + case HLSL_CLASS_CONSTANT_BUFFER: + vkd3d_string_buffer_printf(string, "ConstantBuffer"); + if ((inner_string = hlsl_type_to_string(ctx, type->e.resource.format))) + { + vkd3d_string_buffer_printf(string, "<%s>", inner_string->buffer); + hlsl_release_string_buffer(ctx, inner_string); + } + return string; + + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_STRING: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VERTEX_SHADER: case HLSL_CLASS_VOID: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_NULL: break; } @@ -2513,19 +2684,21 @@ const char *hlsl_node_type_to_string(enum hlsl_ir_node_type type) { static const char * const names[] = { - [HLSL_IR_CALL ] = "HLSL_IR_CALL", - [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", - [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", - [HLSL_IR_IF ] = "HLSL_IR_IF", - [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", - [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", - [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", - [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", - [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", - [HLSL_IR_RESOURCE_STORE] = "HLSL_IR_RESOURCE_STORE", - [HLSL_IR_STORE ] = "HLSL_IR_STORE", - [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", - [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", + [HLSL_IR_CALL ] = "HLSL_IR_CALL", + [HLSL_IR_CONSTANT ] = "HLSL_IR_CONSTANT", + [HLSL_IR_EXPR ] = "HLSL_IR_EXPR", + [HLSL_IR_IF ] = "HLSL_IR_IF", + [HLSL_IR_INDEX ] = "HLSL_IR_INDEX", + [HLSL_IR_LOAD ] = "HLSL_IR_LOAD", + [HLSL_IR_LOOP ] = "HLSL_IR_LOOP", + [HLSL_IR_JUMP ] = "HLSL_IR_JUMP", + [HLSL_IR_RESOURCE_LOAD ] = "HLSL_IR_RESOURCE_LOAD", + [HLSL_IR_RESOURCE_STORE ] = "HLSL_IR_RESOURCE_STORE", + [HLSL_IR_STRING_CONSTANT] = "HLSL_IR_STRING_CONSTANT", + [HLSL_IR_STORE ] = "HLSL_IR_STORE", + [HLSL_IR_SWITCH ] = "HLSL_IR_SWITCH", + [HLSL_IR_SWIZZLE ] = "HLSL_IR_SWIZZLE", + [HLSL_IR_STATEBLOCK_CONSTANT] = "HLSL_IR_STATEBLOCK_CONSTANT", }; if (type >= ARRAY_SIZE(names)) @@ -2544,7 +2717,7 @@ const char *hlsl_jump_type_to_string(enum hlsl_ir_jump_type type) [HLSL_IR_JUMP_RETURN] = "HLSL_IR_JUMP_RETURN", }; - assert(type < ARRAY_SIZE(names)); + VKD3D_ASSERT(type < ARRAY_SIZE(names)); return names[type]; } @@ -2634,7 +2807,7 @@ const char *debug_hlsl_writemask(unsigned int writemask) char string[5]; unsigned int i = 0, pos = 0; - assert(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); + VKD3D_ASSERT(!(writemask & ~VKD3DSP_WRITEMASK_ALL)); while (writemask) { @@ -2653,7 +2826,7 @@ const char *debug_hlsl_swizzle(uint32_t swizzle, unsigned int size) char string[5]; unsigned int i; - assert(size <= ARRAY_SIZE(components)); + VKD3D_ASSERT(size <= ARRAY_SIZE(components)); for (i = 0; i < size; ++i) string[i] = components[hlsl_swizzle_get_component(swizzle, i)]; string[size] = 0; @@ -2735,6 +2908,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) static const char *const op_names[] = { [HLSL_OP0_VOID] = "void", + [HLSL_OP0_RASTERIZER_SAMPLE_COUNT] = "GetRenderTargetSampleCount", [HLSL_OP1_ABS] = "abs", [HLSL_OP1_BIT_NOT] = "~", @@ -2749,6 +2923,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP1_DSY_COARSE] = "dsy_coarse", [HLSL_OP1_DSY_FINE] = "dsy_fine", [HLSL_OP1_EXP2] = "exp2", + [HLSL_OP1_F16TOF32] = "f16tof32", [HLSL_OP1_FLOOR] = "floor", [HLSL_OP1_FRACT] = "fract", [HLSL_OP1_LOG2] = "log2", @@ -2790,6 +2965,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP3_CMP] = "cmp", [HLSL_OP3_DP2ADD] = "dp2add", [HLSL_OP3_TERNARY] = "ternary", + [HLSL_OP3_MAD] = "mad", }; return op_names[op]; @@ -2875,7 +3051,7 @@ static void dump_ir_resource_load(struct vkd3d_string_buffer *buffer, const stru [HLSL_RESOURCE_RESINFO] = "resinfo", }; - assert(load->load_type < ARRAY_SIZE(type_names)); + VKD3D_ASSERT(load->load_type < ARRAY_SIZE(type_names)); vkd3d_string_buffer_printf(buffer, "%s(resource = ", type_names[load->load_type]); dump_deref(buffer, &load->resource); vkd3d_string_buffer_printf(buffer, ", sampler = "); @@ -2929,6 +3105,11 @@ static void dump_ir_resource_store(struct vkd3d_string_buffer *buffer, const str vkd3d_string_buffer_printf(buffer, ")"); } +static void dump_ir_string(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_string_constant *string) +{ + vkd3d_string_buffer_printf(buffer, "\"%s\"", debugstr_a(string->string)); +} + static void dump_ir_store(struct vkd3d_string_buffer *buffer, const struct hlsl_ir_store *store) { vkd3d_string_buffer_printf(buffer, "= ("); @@ -3048,6 +3229,10 @@ static void dump_instr(struct hlsl_ctx *ctx, struct vkd3d_string_buffer *buffer, dump_ir_resource_store(buffer, hlsl_ir_resource_store(instr)); break; + case HLSL_IR_STRING_CONSTANT: + dump_ir_string(buffer, hlsl_ir_string_constant(instr)); + break; + case HLSL_IR_STORE: dump_ir_store(buffer, hlsl_ir_store(instr)); break; @@ -3086,12 +3271,45 @@ void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl vkd3d_string_buffer_cleanup(&buffer); } +void hlsl_dump_var_default_values(const struct hlsl_ir_var *var) +{ + unsigned int k, component_count = hlsl_type_component_count(var->data_type); + struct vkd3d_string_buffer buffer; + + vkd3d_string_buffer_init(&buffer); + if (!var->default_values) + { + vkd3d_string_buffer_printf(&buffer, "var \"%s\" has no default values.\n", var->name); + vkd3d_string_buffer_trace(&buffer); + vkd3d_string_buffer_cleanup(&buffer); + return; + } + + vkd3d_string_buffer_printf(&buffer, "var \"%s\" default values:", var->name); + for (k = 0; k < component_count; ++k) + { + bool is_string = var->default_values[k].string; + + if (k % 4 == 0 || is_string) + vkd3d_string_buffer_printf(&buffer, "\n "); + + if (is_string) + vkd3d_string_buffer_printf(&buffer, " %s", debugstr_a(var->default_values[k].string)); + else + vkd3d_string_buffer_printf(&buffer, " 0x%08x", var->default_values[k].number.u); + } + vkd3d_string_buffer_printf(&buffer, "\n"); + + vkd3d_string_buffer_trace(&buffer); + vkd3d_string_buffer_cleanup(&buffer); +} + void hlsl_replace_node(struct hlsl_ir_node *old, struct hlsl_ir_node *new) { struct hlsl_src *src, *next; - assert(old->data_type->dimx == new->data_type->dimx); - assert(old->data_type->dimy == new->data_type->dimy); + VKD3D_ASSERT(old->data_type->dimx == new->data_type->dimx); + VKD3D_ASSERT(old->data_type->dimy == new->data_type->dimy); LIST_FOR_EACH_ENTRY_SAFE(src, next, &old->uses, struct hlsl_src, entry) { @@ -3199,6 +3417,12 @@ static void free_ir_resource_load(struct hlsl_ir_resource_load *load) vkd3d_free(load); } +static void free_ir_string_constant(struct hlsl_ir_string_constant *string) +{ + vkd3d_free(string->string); + vkd3d_free(string); +} + static void free_ir_resource_store(struct hlsl_ir_resource_store *store) { hlsl_cleanup_deref(&store->resource); @@ -3243,7 +3467,7 @@ static void free_ir_stateblock_constant(struct hlsl_ir_stateblock_constant *cons void hlsl_free_instr(struct hlsl_ir_node *node) { - assert(list_empty(&node->uses)); + VKD3D_ASSERT(list_empty(&node->uses)); switch (node->type) { @@ -3283,6 +3507,10 @@ void hlsl_free_instr(struct hlsl_ir_node *node) free_ir_resource_load(hlsl_ir_resource_load(node)); break; + case HLSL_IR_STRING_CONSTANT: + free_ir_string_constant(hlsl_ir_string_constant(node)); + break; + case HLSL_IR_RESOURCE_STORE: free_ir_resource_store(hlsl_ir_resource_store(node)); break; @@ -3319,9 +3547,25 @@ void hlsl_free_attribute(struct hlsl_attribute *attr) void hlsl_cleanup_semantic(struct hlsl_semantic *semantic) { vkd3d_free((void *)semantic->name); + vkd3d_free((void *)semantic->raw_name); memset(semantic, 0, sizeof(*semantic)); } +bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src) +{ + *dst = *src; + dst->name = dst->raw_name = NULL; + if (src->name && !(dst->name = hlsl_strdup(ctx, src->name))) + return false; + if (src->raw_name && !(dst->raw_name = hlsl_strdup(ctx, src->raw_name))) + { + hlsl_cleanup_semantic(dst); + return false; + } + + return true; +} + static void free_function_decl(struct hlsl_ir_function_decl *decl) { unsigned int i; @@ -3711,14 +3955,23 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) } ctx->builtin_types.Void = hlsl_new_simple_type(ctx, "void", HLSL_CLASS_VOID); + ctx->builtin_types.null = hlsl_new_type(ctx, "NULL", HLSL_CLASS_NULL, HLSL_TYPE_UINT, 1, 1); + ctx->builtin_types.string = hlsl_new_simple_type(ctx, "string", HLSL_CLASS_STRING); + hlsl_scope_add_type(ctx->globals, ctx->builtin_types.string); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilView", HLSL_CLASS_DEPTH_STENCIL_VIEW)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DepthStencilState", HLSL_CLASS_DEPTH_STENCIL_STATE)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "fxgroup", HLSL_CLASS_EFFECT_GROUP)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pass", HLSL_CLASS_PASS)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "pixelshader", HLSL_CLASS_PIXEL_SHADER)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RasterizerState", HLSL_CLASS_RASTERIZER_STATE)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "RenderTargetView", HLSL_CLASS_RENDER_TARGET_VIEW)); - hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "STRING", HLSL_CLASS_STRING)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "texture", HLSL_CLASS_TEXTURE)); hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "vertexshader", HLSL_CLASS_VERTEX_SHADER)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "ComputeShader", HLSL_CLASS_COMPUTE_SHADER)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "DomainShader", HLSL_CLASS_DOMAIN_SHADER)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "HullShader", HLSL_CLASS_HULL_SHADER)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "GeometryShader", HLSL_CLASS_GEOMETRY_SHADER)); + hlsl_scope_add_type(ctx->globals, hlsl_new_simple_type(ctx, "BlendState", HLSL_CLASS_BLEND_STATE)); for (i = 0; i < ARRAY_SIZE(effect_types); ++i) { @@ -4049,6 +4302,7 @@ struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ct /* Save and restore everything that matters. * Note that saving the scope stack is hard, and shouldn't be necessary. */ + hlsl_push_scope(ctx); ctx->scanner = NULL; ctx->internal_func_name = internal_name->buffer; ctx->cur_function = NULL; @@ -4056,6 +4310,7 @@ struct hlsl_ir_function_decl *hlsl_compile_internal_function(struct hlsl_ctx *ct ctx->scanner = saved_scanner; ctx->internal_func_name = saved_internal_func_name; ctx->cur_function = saved_cur_function; + hlsl_pop_scope(ctx); if (ret) { ERR("Failed to compile intrinsic, error %u.\n", ret); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index 27814f3a56f..22e25b23988 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -78,10 +78,12 @@ enum hlsl_type_class HLSL_CLASS_LAST_NUMERIC = HLSL_CLASS_MATRIX, HLSL_CLASS_STRUCT, HLSL_CLASS_ARRAY, + HLSL_CLASS_DEPTH_STENCIL_STATE, HLSL_CLASS_DEPTH_STENCIL_VIEW, HLSL_CLASS_EFFECT_GROUP, HLSL_CLASS_PASS, HLSL_CLASS_PIXEL_SHADER, + HLSL_CLASS_RASTERIZER_STATE, HLSL_CLASS_RENDER_TARGET_VIEW, HLSL_CLASS_SAMPLER, HLSL_CLASS_STRING, @@ -89,7 +91,14 @@ enum hlsl_type_class HLSL_CLASS_TEXTURE, HLSL_CLASS_UAV, HLSL_CLASS_VERTEX_SHADER, + HLSL_CLASS_COMPUTE_SHADER, + HLSL_CLASS_DOMAIN_SHADER, + HLSL_CLASS_HULL_SHADER, + HLSL_CLASS_GEOMETRY_SHADER, + HLSL_CLASS_CONSTANT_BUFFER, + HLSL_CLASS_BLEND_STATE, HLSL_CLASS_VOID, + HLSL_CLASS_NULL, }; enum hlsl_base_type @@ -222,6 +231,8 @@ struct hlsl_semantic const char *name; uint32_t index; + /* Name exactly as it appears in the sources. */ + const char *raw_name; /* If the variable or field that stores this hlsl_semantic has already reported that it is missing. */ bool reported_missing; /* In case the variable or field that stores this semantic has already reported to use a @@ -259,8 +270,20 @@ struct hlsl_struct_field * struct. */ struct hlsl_reg { - /* Index of the first register allocated. */ + /* Register number of the first register allocated. */ uint32_t id; + /* For descriptors (buffer, texture, sampler, UAV) this is the base binding + * index of the descriptor. + * For 5.1 and above descriptors have space and may be arrayed, in which + * case the array shares a single register ID but has a range of register + * indices, and "id" and "index" are as a rule not equal. + * For versions below 5.1, the register number for descriptors is the same + * as its external binding index, so only "index" is used, and "id" is + * ignored. + * For numeric registers "index" is not used. */ + uint32_t index; + /* Register space of a descriptor. Not used for numeric registers. */ + uint32_t space; /* Number of registers to be allocated. * Unlike the variable's type's regsize, it is not expressed in register components, but rather * in whole registers, and may depend on which components are used within the shader. */ @@ -289,6 +312,7 @@ enum hlsl_ir_node_type HLSL_IR_JUMP, HLSL_IR_RESOURCE_LOAD, HLSL_IR_RESOURCE_STORE, + HLSL_IR_STRING_CONSTANT, HLSL_IR_STORE, HLSL_IR_SWIZZLE, HLSL_IR_SWITCH, @@ -371,6 +395,7 @@ struct hlsl_attribute #define HLSL_STORAGE_LINEAR 0x00010000 #define HLSL_MODIFIER_SINGLE 0x00020000 #define HLSL_MODIFIER_EXPORT 0x00040000 +#define HLSL_STORAGE_ANNOTATION 0x00080000 #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ @@ -385,7 +410,7 @@ struct hlsl_attribute /* Reservation of a register and/or an offset for objects inside constant buffers, to be used as a * starting point of their allocation. They are available through the register(·) and the - * packoffset(·) syntaxes, respectivelly. + * packoffset(·) syntaxes, respectively. * The constant buffer offset is measured register components. */ struct hlsl_reg_reservation { @@ -396,6 +421,14 @@ struct hlsl_reg_reservation unsigned int offset_index; }; +union hlsl_constant_value_component +{ + uint32_t u; + int32_t i; + float f; + double d; +}; + struct hlsl_ir_var { struct hlsl_type *data_type; @@ -418,6 +451,17 @@ struct hlsl_ir_var /* Scope that contains annotations for this variable. */ struct hlsl_scope *annotations; + /* Array of default values the variable was initialized with, one for each component. + * Only for variables that need it, such as uniforms and variables inside constant buffers. + * This pointer is NULL for others. */ + struct hlsl_default_value + { + /* Default value, in case the component is a string, otherwise it is NULL. */ + const char *string; + /* Default value, in case the component is a numeric value. */ + union hlsl_constant_value_component number; + } *default_values; + /* A dynamic array containing the state block on the variable's declaration, if any. * An array variable may contain multiple state blocks. * A technique pass will always contain one. @@ -460,6 +504,8 @@ struct hlsl_ir_var uint32_t is_uniform : 1; uint32_t is_param : 1; uint32_t is_separated_resource : 1; + uint32_t is_synthetic : 1; + uint32_t has_explicit_bind_point : 1; }; /* This struct is used to represent assignments in state block entries: @@ -470,22 +516,31 @@ struct hlsl_ir_var * name[lhs_index] = args[0] * - or - * name[lhs_index] = {args[0], args[1], ...}; + * + * This struct also represents function call syntax: + * name(args[0], args[1], ...) */ struct hlsl_state_block_entry { - /* For assignments, the name in the lhs. */ + /* Whether this entry is a function call. */ + bool is_function_call; + + /* For assignments, the name in the lhs. + * For functions, the name of the function. */ char *name; /* Resolved format-specific property identifier. */ unsigned int name_id; - /* Whether the lhs in the assignment is indexed and, in that case, its index. */ + /* For assignments, whether the lhs of an assignment is indexed and, in + * that case, its index. */ bool lhs_has_index; unsigned int lhs_index; - /* Instructions present in the rhs. */ + /* Instructions present in the rhs or the function arguments. */ struct hlsl_block *instrs; - /* For assignments, arguments of the rhs initializer. */ + /* For assignments, arguments of the rhs initializer. + * For function calls, the arguments themselves. */ struct hlsl_src *args; unsigned int args_count; }; @@ -556,12 +611,21 @@ struct hlsl_ir_if struct hlsl_block else_block; }; +enum hlsl_ir_loop_unroll_type +{ + HLSL_IR_LOOP_UNROLL, + HLSL_IR_LOOP_FORCE_UNROLL, + HLSL_IR_LOOP_FORCE_LOOP +}; + struct hlsl_ir_loop { struct hlsl_ir_node node; /* loop condition is stored in the body (as "if (!condition) break;") */ struct hlsl_block body; unsigned int next_index; /* liveness index of the end of the loop */ + unsigned int unroll_limit; + enum hlsl_ir_loop_unroll_type unroll_type; }; struct hlsl_ir_switch_case @@ -583,13 +647,14 @@ struct hlsl_ir_switch enum hlsl_ir_expr_op { HLSL_OP0_VOID, + HLSL_OP0_RASTERIZER_SAMPLE_COUNT, HLSL_OP1_ABS, HLSL_OP1_BIT_NOT, HLSL_OP1_CAST, HLSL_OP1_CEIL, HLSL_OP1_COS, - HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi] */ + HLSL_OP1_COS_REDUCED, /* Reduced range [-pi, pi], writes to .x */ HLSL_OP1_DSX, HLSL_OP1_DSX_COARSE, HLSL_OP1_DSX_FINE, @@ -597,6 +662,7 @@ enum hlsl_ir_expr_op HLSL_OP1_DSY_COARSE, HLSL_OP1_DSY_FINE, HLSL_OP1_EXP2, + HLSL_OP1_F16TOF32, HLSL_OP1_FLOOR, HLSL_OP1_FRACT, HLSL_OP1_LOG2, @@ -610,7 +676,7 @@ enum hlsl_ir_expr_op HLSL_OP1_SAT, HLSL_OP1_SIGN, HLSL_OP1_SIN, - HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi] */ + HLSL_OP1_SIN_REDUCED, /* Reduced range [-pi, pi], writes to .y */ HLSL_OP1_SQRT, HLSL_OP1_TRUNC, @@ -643,6 +709,7 @@ enum hlsl_ir_expr_op * CMP(a, b, c) returns 'b' if 'a' >= 0, and 'c' otherwise. It's used only for SM1-SM3 targets. */ HLSL_OP3_CMP, HLSL_OP3_TERNARY, + HLSL_OP3_MAD, }; #define HLSL_MAX_OPERANDS 3 @@ -775,18 +842,18 @@ struct hlsl_ir_constant struct hlsl_ir_node node; struct hlsl_constant_value { - union hlsl_constant_value_component - { - uint32_t u; - int32_t i; - float f; - double d; - } u[4]; + union hlsl_constant_value_component u[4]; } value; /* Constant register of type 'c' where the constant value is stored for SM1. */ struct hlsl_reg reg; }; +struct hlsl_ir_string_constant +{ + struct hlsl_ir_node node; + char *string; +}; + /* Stateblock constants are undeclared values found on state blocks or technique passes descriptions, * that do not concern regular pixel, vertex, or compute shaders, except for parsing. */ struct hlsl_ir_stateblock_constant @@ -811,6 +878,8 @@ struct hlsl_scope bool loop; /* The scope was created for the switch statement. */ bool _switch; + /* The scope contains annotation variables. */ + bool annotations; }; struct hlsl_profile_info @@ -931,7 +1000,9 @@ struct hlsl_ctx /* matrix[HLSL_TYPE_FLOAT][1][3] is a float4x2, i.e. dimx = 2, dimy = 4 */ struct hlsl_type *matrix[HLSL_TYPE_LAST_SCALAR + 1][4][4]; struct hlsl_type *sampler[HLSL_SAMPLER_DIM_LAST_SAMPLER + 1]; + struct hlsl_type *string; struct hlsl_type *Void; + struct hlsl_type *null; } builtin_types; /* List of the instruction nodes for initializing static variables. */ @@ -948,6 +1019,8 @@ struct hlsl_ctx } *regs; size_t count, size; } constant_defs; + /* 'c' registers where the constants expected by SM2 sincos are stored. */ + struct hlsl_reg d3dsincosconst1, d3dsincosconst2; /* Number of temp. registers required for the shader to run, i.e. the largest temp register * index that will be used in the output bytecode (+1). */ uint32_t temp_count; @@ -994,85 +1067,91 @@ struct hlsl_resource_load_params static inline struct hlsl_ir_call *hlsl_ir_call(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_CALL); + VKD3D_ASSERT(node->type == HLSL_IR_CALL); return CONTAINING_RECORD(node, struct hlsl_ir_call, node); } static inline struct hlsl_ir_constant *hlsl_ir_constant(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_CONSTANT); + VKD3D_ASSERT(node->type == HLSL_IR_CONSTANT); return CONTAINING_RECORD(node, struct hlsl_ir_constant, node); } +static inline struct hlsl_ir_string_constant *hlsl_ir_string_constant(const struct hlsl_ir_node *node) +{ + VKD3D_ASSERT(node->type == HLSL_IR_STRING_CONSTANT); + return CONTAINING_RECORD(node, struct hlsl_ir_string_constant, node); +} + static inline struct hlsl_ir_expr *hlsl_ir_expr(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_EXPR); + VKD3D_ASSERT(node->type == HLSL_IR_EXPR); return CONTAINING_RECORD(node, struct hlsl_ir_expr, node); } static inline struct hlsl_ir_if *hlsl_ir_if(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_IF); + VKD3D_ASSERT(node->type == HLSL_IR_IF); return CONTAINING_RECORD(node, struct hlsl_ir_if, node); } static inline struct hlsl_ir_jump *hlsl_ir_jump(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_JUMP); + VKD3D_ASSERT(node->type == HLSL_IR_JUMP); return CONTAINING_RECORD(node, struct hlsl_ir_jump, node); } static inline struct hlsl_ir_load *hlsl_ir_load(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_LOAD); + VKD3D_ASSERT(node->type == HLSL_IR_LOAD); return CONTAINING_RECORD(node, struct hlsl_ir_load, node); } static inline struct hlsl_ir_loop *hlsl_ir_loop(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_LOOP); + VKD3D_ASSERT(node->type == HLSL_IR_LOOP); return CONTAINING_RECORD(node, struct hlsl_ir_loop, node); } static inline struct hlsl_ir_resource_load *hlsl_ir_resource_load(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_RESOURCE_LOAD); + VKD3D_ASSERT(node->type == HLSL_IR_RESOURCE_LOAD); return CONTAINING_RECORD(node, struct hlsl_ir_resource_load, node); } static inline struct hlsl_ir_resource_store *hlsl_ir_resource_store(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_RESOURCE_STORE); + VKD3D_ASSERT(node->type == HLSL_IR_RESOURCE_STORE); return CONTAINING_RECORD(node, struct hlsl_ir_resource_store, node); } static inline struct hlsl_ir_store *hlsl_ir_store(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_STORE); + VKD3D_ASSERT(node->type == HLSL_IR_STORE); return CONTAINING_RECORD(node, struct hlsl_ir_store, node); } static inline struct hlsl_ir_swizzle *hlsl_ir_swizzle(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_SWIZZLE); + VKD3D_ASSERT(node->type == HLSL_IR_SWIZZLE); return CONTAINING_RECORD(node, struct hlsl_ir_swizzle, node); } static inline struct hlsl_ir_index *hlsl_ir_index(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_INDEX); + VKD3D_ASSERT(node->type == HLSL_IR_INDEX); return CONTAINING_RECORD(node, struct hlsl_ir_index, node); } static inline struct hlsl_ir_switch *hlsl_ir_switch(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_SWITCH); + VKD3D_ASSERT(node->type == HLSL_IR_SWITCH); return CONTAINING_RECORD(node, struct hlsl_ir_switch, node); } static inline struct hlsl_ir_stateblock_constant *hlsl_ir_stateblock_constant(const struct hlsl_ir_node *node) { - assert(node->type == HLSL_IR_STATEBLOCK_CONSTANT); + VKD3D_ASSERT(node->type == HLSL_IR_STATEBLOCK_CONSTANT); return CONTAINING_RECORD(node, struct hlsl_ir_stateblock_constant, node); } @@ -1249,6 +1328,13 @@ void hlsl_block_cleanup(struct hlsl_block *block); bool hlsl_clone_block(struct hlsl_ctx *ctx, struct hlsl_block *dst_block, const struct hlsl_block *src_block); void hlsl_dump_function(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *func); +void hlsl_dump_var_default_values(const struct hlsl_ir_var *var); + +bool hlsl_validate_state_block_entry(struct hlsl_ctx *ctx, struct hlsl_state_block_entry *entry, + const struct vkd3d_shader_location *loc); +struct hlsl_state_block_entry *clone_stateblock_entry(struct hlsl_ctx *ctx, + struct hlsl_state_block_entry *src, const char *name, bool lhs_has_index, + unsigned int lhs_index, unsigned int arg_index); void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body); int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, @@ -1259,7 +1345,9 @@ bool hlsl_init_deref_from_index_chain(struct hlsl_ctx *ctx, struct hlsl_deref *d bool hlsl_copy_deref(struct hlsl_ctx *ctx, struct hlsl_deref *deref, const struct hlsl_deref *other); void hlsl_cleanup_deref(struct hlsl_deref *deref); + void hlsl_cleanup_semantic(struct hlsl_semantic *semantic); +bool hlsl_clone_semantic(struct hlsl_ctx *ctx, struct hlsl_semantic *dst, const struct hlsl_semantic *src); void hlsl_cleanup_ir_switch_cases(struct list *cases); void hlsl_free_ir_switch_case(struct hlsl_ir_switch_case *c); @@ -1270,6 +1358,7 @@ void hlsl_free_attribute(struct hlsl_attribute *attr); void hlsl_free_instr(struct hlsl_ir_node *node); void hlsl_free_instr_list(struct list *list); void hlsl_free_state_block(struct hlsl_state_block *state_block); +void hlsl_free_state_block_entry(struct hlsl_state_block_entry *state_block_entry); void hlsl_free_type(struct hlsl_type *type); void hlsl_free_var(struct hlsl_ir_var *decl); @@ -1342,7 +1431,7 @@ bool hlsl_index_chain_has_resource_access(struct hlsl_ir_index *index); struct hlsl_ir_node *hlsl_new_index(struct hlsl_ctx *ctx, struct hlsl_ir_node *val, struct hlsl_ir_node *idx, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_loop(struct hlsl_ctx *ctx, - struct hlsl_block *block, const struct vkd3d_shader_location *loc); + struct hlsl_block *block, enum hlsl_ir_loop_unroll_type unroll_type, unsigned int unroll_limit, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_load(struct hlsl_ctx *ctx, const struct hlsl_resource_load_params *params, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_resource_store(struct hlsl_ctx *ctx, const struct hlsl_deref *resource, @@ -1353,6 +1442,8 @@ struct hlsl_ir_node *hlsl_new_swizzle(struct hlsl_ctx *ctx, uint32_t s, unsigned struct hlsl_ir_node *val, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_stateblock_constant(struct hlsl_ctx *ctx, const char *name, struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_string_constant(struct hlsl_ctx *ctx, const char *str, + const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var(struct hlsl_ctx *ctx, const char *template, struct hlsl_type *type, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_synthetic_var_named(struct hlsl_ctx *ctx, const char *name, @@ -1361,8 +1452,10 @@ struct hlsl_type *hlsl_new_texture_type(struct hlsl_ctx *ctx, enum hlsl_sampler_ unsigned int sample_count); struct hlsl_type *hlsl_new_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, struct hlsl_type *format, bool rasteriser_ordered); +struct hlsl_type *hlsl_new_cb_type(struct hlsl_ctx *ctx, struct hlsl_type *format); struct hlsl_ir_node *hlsl_new_uint_constant(struct hlsl_ctx *ctx, unsigned int n, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_null_constant(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_unary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, struct hlsl_ir_node *arg, const struct vkd3d_shader_location *loc); struct hlsl_ir_var *hlsl_new_var(struct hlsl_ctx *ctx, const char *name, struct hlsl_type *type, @@ -1432,10 +1525,16 @@ bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); -bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, - bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); -bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); -int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out); +bool hlsl_sm1_register_from_semantic(const struct vkd3d_shader_version *version, const char *semantic_name, + unsigned int semantic_index, bool output, enum vkd3d_shader_register_type *type, unsigned int *reg); +bool hlsl_sm1_usage_from_semantic(const char *semantic_name, + uint32_t semantic_index, D3DDECLUSAGE *usage, uint32_t *usage_idx); + +void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer); +int d3dbc_compile(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_code *ctab, + struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context, + struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func); bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3D_NAME *usage); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index a5923d8bf8e..0c02b27817e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -29,6 +29,8 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); +static void apply_escape_sequences(char *str); + #define YY_USER_ACTION update_location(yyget_extra(yyscanner), yyget_lloc(yyscanner)); %} @@ -49,11 +51,11 @@ static void update_location(struct hlsl_ctx *ctx, YYLTYPE *loc); RESERVED1 auto|catch|char|class|const_cast|delete|dynamic_cast|enum RESERVED2 explicit|friend|goto|long|mutable|new|operator|private|protected|public RESERVED3 reinterpret_cast|short|signed|sizeof|static_cast|template|this|throw|try -RESERVED4 typename|union|unsigned|using|virtual +RESERVED4 typename|union|using|virtual WS [ \t] NEWLINE (\n)|(\r\n) -STRING \"[^\"]*\" +STRING \"([^\"\\]|\\.)*\" IDENTIFIER [A-Za-z_][A-Za-z0-9_]* ANY (.) @@ -105,6 +107,7 @@ matrix {return KW_MATRIX; } namespace {return KW_NAMESPACE; } nointerpolation {return KW_NOINTERPOLATION; } noperspective {return KW_NOPERSPECTIVE; } +NULL {return KW_NULL; } out {return KW_OUT; } packoffset {return KW_PACKOFFSET; } pass {return KW_PASS; } @@ -142,6 +145,7 @@ stateblock {return KW_STATEBLOCK; } stateblock_state {return KW_STATEBLOCK_STATE; } static {return KW_STATIC; } string {return KW_STRING; } +String {return KW_STRING; } struct {return KW_STRUCT; } switch {return KW_SWITCH; } tbuffer {return KW_TBUFFER; } @@ -164,6 +168,7 @@ textureCUBE {return KW_TEXTURECUBE; } TextureCubeArray {return KW_TEXTURECUBEARRAY; } true {return KW_TRUE; } typedef {return KW_TYPEDEF; } +unsigned {return KW_UNSIGNED; } uniform {return KW_UNIFORM; } vector {return KW_VECTOR; } VertexShader {return KW_VERTEXSHADER; } @@ -197,7 +202,9 @@ while {return KW_WHILE; } struct hlsl_ctx *ctx = yyget_extra(yyscanner); yylval->name = hlsl_strdup(ctx, yytext); - if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) + if (hlsl_version_ge(ctx, 5, 1) && !strcmp(yytext, "ConstantBuffer")) + return KW_CONSTANTBUFFER; + else if (hlsl_get_var(ctx->cur_scope, yytext) || hlsl_get_function(ctx, yytext)) return VAR_IDENTIFIER; else if (hlsl_get_type(ctx->cur_scope, yytext, true, true)) return TYPE_IDENTIFIER; @@ -205,6 +212,16 @@ while {return KW_WHILE; } return NEW_IDENTIFIER; } +{STRING} { + struct hlsl_ctx *ctx = yyget_extra(yyscanner); + char *string = hlsl_strdup(ctx, yytext + 1); + + string[strlen(string) - 1] = 0; + apply_escape_sequences(string); + yylval->name = string; + return STRING; + } + [0-9]*\.[0-9]+([eE][+-]?[0-9]+)?[h|H|f|F]? { yylval->floatval = atof(yytext); return C_FLOAT; @@ -289,6 +306,7 @@ while {return KW_WHILE; } BEGIN(pp_ignore); string[strlen(string) - 1] = 0; + apply_escape_sequences(string); yylval->name = string; return STRING; } @@ -338,3 +356,115 @@ int hlsl_lexer_compile(struct hlsl_ctx *ctx, const struct vkd3d_shader_code *hls yylex_destroy(ctx->scanner); return ret; } + +static void apply_escape_sequences(char *str) +{ + unsigned int i = 0, k = 0, r; + + while (str[i]) + { + unsigned char v = 0; + + if (str[i] != '\\') + { + str[k++] = str[i]; + ++i; + continue; + } + + ++i; + VKD3D_ASSERT(str[i]); + + if ('0' <= str[i] && str[i] <= '7') + { + /* octal, up to 3 digits. */ + for (r = 0; r < 3; ++r) + { + char c = str[i]; + + if ('0' <= c && c <= '7') + { + v = v << 3; + v += c - '0'; + ++i; + } + else + break; + } + str[k++] = v; + continue; + } + + if (str[i] == 'x') + { + bool number = false; + + /* hexadecimal */ + ++i; + while (1) + { + char c = str[i]; + + if ('0' <= c && c <= '9') + { + v = v << 4; + v += c - '0'; + number = true; + ++i; + } + else if ('a' <= c && c <= 'f') + { + v = v << 4; + v += c - 'a' + 10; + number = true; + ++i; + } + else if ('A' <= c && c <= 'F') + { + v = v << 4; + v += c - 'A' + 10; + number = true; + ++i; + } + else + break; + } + if (number) + str[k++] = v; + else + str[k++] = 'x'; + continue; + } + + switch (str[i]) + { + case 'a': + str[k++] = '\a'; + break; + case 'b': + str[k++] = '\b'; + break; + case 'f': + str[k++] = '\f'; + break; + case 'n': + str[k++] = '\n'; + break; + case 'r': + str[k++] = '\r'; + break; + case 't': + str[k++] = '\t'; + break; + case 'v': + str[k++] = '\v'; + break; + + default: + str[k++] = str[i]; + break; + } + ++i; + } + str[k++] = '\0'; +} diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index 9c1bdef926d..3f319dea0d8 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -34,6 +34,14 @@ struct parse_fields size_t count, capacity; }; +struct parse_initializer +{ + struct hlsl_ir_node **args; + unsigned int args_count; + struct hlsl_block *instrs; + bool braces; +}; + struct parse_parameter { struct hlsl_type *type; @@ -41,6 +49,7 @@ struct parse_parameter struct hlsl_semantic semantic; struct hlsl_reg_reservation reg_reservation; uint32_t modifiers; + struct parse_initializer initializer; }; struct parse_colon_attribute @@ -49,14 +58,6 @@ struct parse_colon_attribute struct hlsl_reg_reservation reg_reservation; }; -struct parse_initializer -{ - struct hlsl_ir_node **args; - unsigned int args_count; - struct hlsl_block *instrs; - bool braces; -}; - struct parse_array_sizes { uint32_t *sizes; /* innermost first */ @@ -73,6 +74,7 @@ struct parse_variable_def struct hlsl_semantic semantic; struct hlsl_reg_reservation reg_reservation; struct parse_initializer initializer; + struct hlsl_scope *annotations; struct hlsl_type *basic_type; uint32_t modifiers; @@ -302,6 +304,26 @@ static bool implicit_compatible_data_types(struct hlsl_ctx *ctx, struct hlsl_typ } } + if (src->class == HLSL_CLASS_NULL) + { + switch (dst->class) + { + case HLSL_CLASS_DEPTH_STENCIL_STATE: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: + case HLSL_CLASS_TEXTURE: + case HLSL_CLASS_UAV: + case HLSL_CLASS_VERTEX_SHADER: + return true; + default: + break; + } + } + return hlsl_types_are_componentwise_equal(ctx, src, dst); } @@ -329,6 +351,9 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl if (hlsl_types_are_equal(src_type, dst_type)) return node; + if (src_type->class == HLSL_CLASS_NULL) + return node; + if (src_type->class > HLSL_CLASS_VECTOR || dst_type->class > HLSL_CLASS_VECTOR) { unsigned int src_comp_count = hlsl_type_component_count(src_type); @@ -342,11 +367,11 @@ static struct hlsl_ir_node *add_cast(struct hlsl_ctx *ctx, struct hlsl_block *bl broadcast = hlsl_is_numeric_type(src_type) && src_type->dimx == 1 && src_type->dimy == 1; matrix_cast = !broadcast && dst_comp_count != src_comp_count && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX; - assert(src_comp_count >= dst_comp_count || broadcast); + VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast); if (matrix_cast) { - assert(dst_type->dimx <= src_type->dimx); - assert(dst_type->dimy <= src_type->dimy); + VKD3D_ASSERT(dst_type->dimx <= src_type->dimx); + VKD3D_ASSERT(dst_type->dimy <= src_type->dimy); } if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, loc))) @@ -573,12 +598,104 @@ static void check_loop_attributes(struct hlsl_ctx *ctx, const struct parse_attri hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Unroll attribute can't be used with 'fastopt' attribute."); } +static struct hlsl_default_value evaluate_static_expression(struct hlsl_ctx *ctx, + struct hlsl_block *block, struct hlsl_type *dst_type, const struct vkd3d_shader_location *loc) +{ + struct hlsl_default_value ret = {0}; + struct hlsl_ir_node *node; + struct hlsl_block expr; + struct hlsl_src src; + + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { + switch (node->type) + { + case HLSL_IR_CONSTANT: + case HLSL_IR_EXPR: + case HLSL_IR_STRING_CONSTANT: + case HLSL_IR_SWIZZLE: + case HLSL_IR_LOAD: + case HLSL_IR_INDEX: + continue; + case HLSL_IR_STORE: + if (hlsl_ir_store(node)->lhs.var->is_synthetic) + break; + /* fall-through */ + case HLSL_IR_CALL: + case HLSL_IR_IF: + case HLSL_IR_LOOP: + case HLSL_IR_JUMP: + case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_RESOURCE_STORE: + case HLSL_IR_SWITCH: + case HLSL_IR_STATEBLOCK_CONSTANT: + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Expected literal expression."); + break; + } + } + + if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) + return ret; + hlsl_block_add_block(&expr, block); + + if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), dst_type, loc)) + { + hlsl_block_cleanup(&expr); + return ret; + } + + /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ + hlsl_src_from_node(&src, node_from_block(&expr)); + hlsl_run_const_passes(ctx, &expr); + node = src.node; + hlsl_src_remove(&src); + + if (node->type == HLSL_IR_CONSTANT) + { + struct hlsl_ir_constant *constant = hlsl_ir_constant(node); + + ret.number = constant->value.u[0]; + } + else if (node->type == HLSL_IR_STRING_CONSTANT) + { + struct hlsl_ir_string_constant *string = hlsl_ir_string_constant(node); + + if (!(ret.string = vkd3d_strdup(string->string))) + return ret; + } + else if (node->type == HLSL_IR_STRING_CONSTANT) + { + hlsl_fixme(ctx, &node->loc, "Evaluate string constants as static expressions."); + } + else + { + hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Failed to evaluate constant expression."); + } + + hlsl_block_cleanup(&expr); + + return ret; +} + +static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, + const struct vkd3d_shader_location *loc) +{ + struct hlsl_default_value res; + + res = evaluate_static_expression(ctx, block, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc); + VKD3D_ASSERT(!res.string); + return res.number.u; +} + static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct parse_attribute_list *attributes, struct hlsl_block *init, struct hlsl_block *cond, struct hlsl_block *iter, struct hlsl_block *body, const struct vkd3d_shader_location *loc) { + enum hlsl_ir_loop_unroll_type unroll_type = HLSL_IR_LOOP_UNROLL; + unsigned int i, unroll_limit = 0; struct hlsl_ir_node *loop; - unsigned int i; if (attribute_list_has_duplicates(attributes)) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, "Found duplicate attribute."); @@ -591,18 +708,29 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, const struct hlsl_attribute *attr = attributes->attrs[i]; if (!strcmp(attr->name, "unroll")) { - if (attr->args_count) + if (attr->args_count > 1) { - hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Unroll attribute with iteration count."); + hlsl_warning(ctx, &attr->loc, VKD3D_SHADER_WARNING_HLSL_IGNORED_ATTRIBUTE, + "Ignoring 'unroll' attribute with more than 1 argument."); + continue; } - else + + if (attr->args_count == 1) { - hlsl_warning(ctx, loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED, "Loop unrolling is not implemented."); + struct hlsl_block expr; + hlsl_block_init(&expr); + if (!hlsl_clone_block(ctx, &expr, &attr->instrs)) + return NULL; + + unroll_limit = evaluate_static_expression_as_uint(ctx, &expr, loc); + hlsl_block_cleanup(&expr); } + + unroll_type = HLSL_IR_LOOP_FORCE_UNROLL; } else if (!strcmp(attr->name, "loop")) { - /* TODO: this attribute will be used to disable unrolling, once it's implememented. */ + unroll_type = HLSL_IR_LOOP_FORCE_LOOP; } else if (!strcmp(attr->name, "fastopt") || !strcmp(attr->name, "allow_uav_condition")) @@ -631,7 +759,7 @@ static struct hlsl_block *create_loop(struct hlsl_ctx *ctx, enum loop_type type, else list_move_head(&body->instrs, &cond->instrs); - if (!(loop = hlsl_new_loop(ctx, body, loc))) + if (!(loop = hlsl_new_loop(ctx, body, unroll_type, unroll_limit, loc))) goto oom; hlsl_block_add_instr(init, loop); @@ -663,7 +791,7 @@ static void cleanup_parse_attribute_list(struct parse_attribute_list *attr_list) { unsigned int i = 0; - assert(attr_list); + VKD3D_ASSERT(attr_list); for (i = 0; i < attr_list->count; ++i) hlsl_free_attribute((struct hlsl_attribute *) attr_list->attrs[i]); vkd3d_free(attr_list->attrs); @@ -823,7 +951,7 @@ static bool add_record_access(struct hlsl_ctx *ctx, struct hlsl_block *block, st { struct hlsl_ir_node *index, *c; - assert(idx < record->data_type->e.record.field_count); + VKD3D_ASSERT(idx < record->data_type->e.record.field_count); if (!(c = hlsl_new_uint_constant(ctx, idx, loc))) return false; @@ -953,7 +1081,7 @@ static void free_parse_variable_def(struct parse_variable_def *v) vkd3d_free(v->arrays.sizes); vkd3d_free(v->name); hlsl_cleanup_semantic(&v->semantic); - assert(!v->state_blocks); + VKD3D_ASSERT(!v->state_blocks); vkd3d_free(v); } @@ -964,7 +1092,7 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, size_t i = 0; if (type->class == HLSL_CLASS_MATRIX) - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); memset(fields, 0, sizeof(*fields)); fields->count = list_count(defs); @@ -1013,6 +1141,10 @@ static bool gen_struct_fields(struct hlsl_ctx *ctx, struct parse_fields *fields, field->type = hlsl_new_array_type(ctx, field->type, v->arrays.sizes[k]); } } + + if (hlsl_version_ge(ctx, 5, 1) && field->type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(field->type)) + hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); + vkd3d_free(v->arrays.sizes); field->loc = v->loc; field->name = v->name; @@ -1094,13 +1226,16 @@ static bool add_typedef(struct hlsl_ctx *ctx, struct hlsl_type *const orig_type, return true; } +static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, + struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src); + static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters *parameters, struct parse_parameter *param, const struct vkd3d_shader_location *loc) { struct hlsl_ir_var *var; if (param->type->class == HLSL_CLASS_MATRIX) - assert(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + VKD3D_ASSERT(param->type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); if ((param->modifiers & HLSL_STORAGE_OUT) && (param->modifiers & HLSL_STORAGE_UNIFORM)) hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, @@ -1110,11 +1245,52 @@ static bool add_func_parameter(struct hlsl_ctx *ctx, struct hlsl_func_parameters hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "packoffset() is not allowed on function parameters."); + if (parameters->count && parameters->vars[parameters->count - 1]->default_values + && !param->initializer.args_count) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, + "Missing default value for parameter '%s'.", param->name); + + if (param->initializer.args_count && (param->modifiers & HLSL_STORAGE_OUT)) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER, + "Output parameter '%s' has a default value.", param->name); + if (!(var = hlsl_new_var(ctx, param->name, param->type, loc, ¶m->semantic, param->modifiers, ¶m->reg_reservation))) return false; var->is_param = 1; + if (param->initializer.args_count) + { + unsigned int component_count = hlsl_type_component_count(param->type); + unsigned int store_index = 0; + unsigned int size, i; + + if (!(var->default_values = hlsl_calloc(ctx, component_count, sizeof(*var->default_values)))) + return false; + + if (!param->initializer.braces) + { + if (!(add_implicit_conversion(ctx, param->initializer.instrs, param->initializer.args[0], param->type, loc))) + return false; + + param->initializer.args[0] = node_from_block(param->initializer.instrs); + } + + size = initializer_size(¶m->initializer); + if (component_count != size) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected %u components in initializer, but got %u.", component_count, size); + } + + for (i = 0; i < param->initializer.args_count; ++i) + { + initialize_var_components(ctx, param->initializer.instrs, var, &store_index, param->initializer.args[i]); + } + + free_parse_initializer(¶m->initializer); + } + if (!hlsl_add_var(ctx, var, false)) { hlsl_free_var(var); @@ -1210,12 +1386,42 @@ static bool add_effect_group(struct hlsl_ctx *ctx, const char *name, struct hlsl return true; } -static bool parse_reservation_index(const char *string, char *type, uint32_t *index) +static bool parse_reservation_index(struct hlsl_ctx *ctx, const char *string, unsigned int bracket_offset, + struct hlsl_reg_reservation *reservation) { - if (!sscanf(string + 1, "%u", index)) - return false; + char *endptr; + + reservation->reg_type = ascii_tolower(string[0]); + + /* Prior to SM5.1, fxc simply ignored bracket offsets for 'b' types. */ + if (reservation->reg_type == 'b' && hlsl_version_lt(ctx, 5, 1)) + { + bracket_offset = 0; + } + + if (string[1] == '\0') + { + reservation->reg_index = bracket_offset; + return true; + } + + reservation->reg_index = strtoul(string + 1, &endptr, 10) + bracket_offset; + + if (*endptr) + { + /* fxc for SM >= 4 treats all parse failures for 'b' types as successes, + * setting index to -1. It will later fail while validating slot limits. */ + if (reservation->reg_type == 'b' && hlsl_version_ge(ctx, 4, 0)) + { + reservation->reg_index = -1; + return true; + } + + /* All other types tolerate leftover characters. */ + if (endptr == string + 1) + return false; + } - *type = ascii_tolower(string[0]); return true; } @@ -1286,72 +1492,6 @@ static struct hlsl_block *make_block(struct hlsl_ctx *ctx, struct hlsl_ir_node * return block; } -static unsigned int evaluate_static_expression_as_uint(struct hlsl_ctx *ctx, struct hlsl_block *block, - const struct vkd3d_shader_location *loc) -{ - struct hlsl_ir_constant *constant; - struct hlsl_ir_node *node; - struct hlsl_block expr; - unsigned int ret = 0; - struct hlsl_src src; - - LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) - { - switch (node->type) - { - case HLSL_IR_CONSTANT: - case HLSL_IR_EXPR: - case HLSL_IR_SWIZZLE: - case HLSL_IR_LOAD: - case HLSL_IR_INDEX: - continue; - case HLSL_IR_CALL: - case HLSL_IR_IF: - case HLSL_IR_LOOP: - case HLSL_IR_JUMP: - case HLSL_IR_RESOURCE_LOAD: - case HLSL_IR_RESOURCE_STORE: - case HLSL_IR_STORE: - case HLSL_IR_SWITCH: - case HLSL_IR_STATEBLOCK_CONSTANT: - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Expected literal expression."); - } - } - - if (!hlsl_clone_block(ctx, &expr, &ctx->static_initializers)) - return 0; - hlsl_block_add_block(&expr, block); - - if (!add_implicit_conversion(ctx, &expr, node_from_block(&expr), - hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)) - { - hlsl_block_cleanup(&expr); - return 0; - } - - /* Wrap the node into a src to allow the reference to survive the multiple const passes. */ - hlsl_src_from_node(&src, node_from_block(&expr)); - hlsl_run_const_passes(ctx, &expr); - node = src.node; - hlsl_src_remove(&src); - - if (node->type == HLSL_IR_CONSTANT) - { - constant = hlsl_ir_constant(node); - ret = constant->value.u[0].u; - } - else - { - hlsl_error(ctx, &node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "Failed to evaluate constant expression."); - } - - hlsl_block_cleanup(&expr); - - return ret; -} - static bool expr_compatible_data_types(struct hlsl_type *t1, struct hlsl_type *t2) { /* Scalar vars can be converted to pretty much everything */ @@ -1759,49 +1899,51 @@ static struct hlsl_ir_node *add_binary_dot_expr(struct hlsl_ctx *ctx, struct hls return add_expr(ctx, instrs, op, args, ret_type, loc); } -static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, - struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +static struct hlsl_ir_node *add_binary_expr(struct hlsl_ctx *ctx, struct hlsl_block *block, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *lhs, struct hlsl_ir_node *rhs, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); - - hlsl_block_add_block(block1, block2); - destroy_block(block2); - switch (op) { case HLSL_OP2_ADD: case HLSL_OP2_DIV: case HLSL_OP2_MOD: case HLSL_OP2_MUL: - add_binary_arithmetic_expr(ctx, block1, op, arg1, arg2, loc); - break; + return add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, loc); case HLSL_OP2_BIT_AND: case HLSL_OP2_BIT_OR: case HLSL_OP2_BIT_XOR: - add_binary_bitwise_expr(ctx, block1, op, arg1, arg2, loc); - break; + return add_binary_bitwise_expr(ctx, block, op, lhs, rhs, loc); case HLSL_OP2_LESS: case HLSL_OP2_GEQUAL: case HLSL_OP2_EQUAL: case HLSL_OP2_NEQUAL: - add_binary_comparison_expr(ctx, block1, op, arg1, arg2, loc); - break; + return add_binary_comparison_expr(ctx, block, op, lhs, rhs, loc); case HLSL_OP2_LOGIC_AND: case HLSL_OP2_LOGIC_OR: - add_binary_logical_expr(ctx, block1, op, arg1, arg2, loc); - break; + return add_binary_logical_expr(ctx, block, op, lhs, rhs, loc); case HLSL_OP2_LSHIFT: case HLSL_OP2_RSHIFT: - add_binary_shift_expr(ctx, block1, op, arg1, arg2, loc); - break; + return add_binary_shift_expr(ctx, block, op, lhs, rhs, loc); default: vkd3d_unreachable(); } +} + +static struct hlsl_block *add_binary_expr_merge(struct hlsl_ctx *ctx, struct hlsl_block *block1, + struct hlsl_block *block2, enum hlsl_ir_expr_op op, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg1 = node_from_block(block1), *arg2 = node_from_block(block2); + + hlsl_block_add_block(block1, block2); + destroy_block(block2); + + if (add_binary_expr(ctx, block1, op, arg1, arg2, loc) == NULL) + return NULL; return block1; } @@ -1862,12 +2004,57 @@ static bool invert_swizzle(uint32_t *swizzle, unsigned int *writemask, unsigned return true; } +static bool invert_swizzle_matrix(uint32_t *swizzle, unsigned int *writemask, unsigned int *ret_width) +{ + /* swizzle is 8 bits per component, each component is (from LSB) 4 bits X, then 4 bits Y. + * components are indexed by their sources. i.e. the first component comes from the first + * component of the rhs. */ + unsigned int i, j, bit = 0, inverted = 0, width, new_writemask = 0, new_swizzle = 0; + + /* First, we filter the swizzle to remove components that aren't enabled by writemask. */ + for (i = 0; i < 4; ++i) + { + if (*writemask & (1 << i)) + { + unsigned int s = (*swizzle >> (i * 8)) & 0xff; + unsigned int x = s & 0xf, y = (s >> 4) & 0xf; + unsigned int idx = x + y * 4; + new_swizzle |= s << (bit++ * 8); + if (new_writemask & (1 << idx)) + return false; + new_writemask |= 1 << idx; + } + } + width = bit; + + /* Then we invert the swizzle. The resulting swizzle has 2 bits per component, because it's for the + * incoming vector. */ + bit = 0; + for (i = 0; i < 16; ++i) + { + for (j = 0; j < width; ++j) + { + unsigned int s = (new_swizzle >> (j * 8)) & 0xff; + unsigned int x = s & 0xf, y = (s >> 4) & 0xf; + unsigned int idx = x + y * 4; + if (idx == i) + inverted |= j << (bit++ * 2); + } + } + + *swizzle = inverted; + *writemask = new_writemask; + *ret_width = width; + return true; +} + static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *lhs, enum parse_assign_op assign_op, struct hlsl_ir_node *rhs) { struct hlsl_type *lhs_type = lhs->data_type; struct hlsl_ir_node *copy; - unsigned int writemask = 0; + unsigned int writemask = 0, width = 0; + bool matrix_writemask = false; if (assign_op == ASSIGN_OP_SUB) { @@ -1879,13 +2066,16 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo { enum hlsl_ir_expr_op op = op_from_assignment(assign_op); - assert(op); - if (!(rhs = add_binary_arithmetic_expr(ctx, block, op, lhs, rhs, &rhs->loc))) + VKD3D_ASSERT(op); + if (!(rhs = add_binary_expr(ctx, block, op, lhs, rhs, &rhs->loc))) return NULL; } if (hlsl_is_numeric_type(lhs_type)) + { writemask = (1 << lhs_type->dimx) - 1; + width = lhs_type->dimx; + } if (!(rhs = add_implicit_conversion(ctx, block, rhs, lhs_type, &rhs->loc))) return NULL; @@ -1902,12 +2092,24 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(lhs); struct hlsl_ir_node *new_swizzle; uint32_t s = swizzle->swizzle; - unsigned int width; - if (lhs->data_type->class == HLSL_CLASS_MATRIX) - hlsl_fixme(ctx, &lhs->loc, "Matrix assignment with a writemask."); + VKD3D_ASSERT(!matrix_writemask); - if (!invert_swizzle(&s, &writemask, &width)) + if (swizzle->val.node->data_type->class == HLSL_CLASS_MATRIX) + { + if (swizzle->val.node->type != HLSL_IR_LOAD && swizzle->val.node->type != HLSL_IR_INDEX) + { + hlsl_fixme(ctx, &lhs->loc, "Unhandled source of matrix swizzle."); + return NULL; + } + if (!invert_swizzle_matrix(&s, &writemask, &width)) + { + hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask for matrix."); + return NULL; + } + matrix_writemask = true; + } + else if (!invert_swizzle(&s, &writemask, &width)) { hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Invalid writemask."); return NULL; @@ -1947,7 +2149,7 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo return NULL; resource_type = hlsl_deref_get_type(ctx, &resource_deref); - assert(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); + VKD3D_ASSERT(resource_type->class == HLSL_CLASS_TEXTURE || resource_type->class == HLSL_CLASS_UAV); if (resource_type->class != HLSL_CLASS_UAV) hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, @@ -1955,13 +2157,13 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim); - if (writemask != ((1u << resource_type->e.resource.format->dimx) - 1)) + if (width != resource_type->e.resource.format->dimx * resource_type->e.resource.format->dimy) hlsl_error(ctx, &lhs->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_WRITEMASK, "Resource store expressions must write to all components."); - assert(coords->data_type->class == HLSL_CLASS_VECTOR); - assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); - assert(coords->data_type->dimx == dim_count); + VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); + VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); + VKD3D_ASSERT(coords->data_type->dimx == dim_count); if (!(store = hlsl_new_resource_store(ctx, &resource_deref, coords, rhs, &lhs->loc))) { @@ -1971,12 +2173,50 @@ static struct hlsl_ir_node *add_assignment(struct hlsl_ctx *ctx, struct hlsl_blo hlsl_block_add_instr(block, store); hlsl_cleanup_deref(&resource_deref); } + else if (matrix_writemask) + { + struct hlsl_deref deref; + unsigned int i, j, k = 0; + + hlsl_init_deref_from_index_chain(ctx, &deref, lhs); + + for (i = 0; i < lhs->data_type->dimy; ++i) + { + for (j = 0; j < lhs->data_type->dimx; ++j) + { + struct hlsl_ir_node *load; + struct hlsl_block store_block; + const unsigned int idx = i * 4 + j; + const unsigned int component = i * lhs->data_type->dimx + j; + + if (!(writemask & (1 << idx))) + continue; + + if (!(load = hlsl_add_load_component(ctx, block, rhs, k++, &rhs->loc))) + { + hlsl_cleanup_deref(&deref); + return NULL; + } + + if (!hlsl_new_store_component(ctx, &store_block, &deref, component, load)) + { + hlsl_cleanup_deref(&deref); + return NULL; + } + hlsl_block_add_block(block, &store_block); + } + } + + hlsl_cleanup_deref(&deref); + } else if (lhs->type == HLSL_IR_INDEX && hlsl_index_is_noncontiguous(hlsl_ir_index(lhs))) { struct hlsl_ir_index *row = hlsl_ir_index(lhs); struct hlsl_ir_node *mat = row->val.node; unsigned int i, k = 0; + VKD3D_ASSERT(!matrix_writemask); + for (i = 0; i < mat->data_type->dimx; ++i) { struct hlsl_ir_node *cell, *load, *store, *c; @@ -2067,6 +2307,55 @@ static bool add_increment(struct hlsl_ctx *ctx, struct hlsl_block *block, bool d return true; } +/* For some reason, for matrices, values from default value initializers end up in different + * components than from regular initializers. Default value initializers fill the matrix in + * vertical reading order (left-to-right top-to-bottom) instead of regular reading order + * (top-to-bottom left-to-right), so they have to be adjusted. + * An exception is that the order of matrix initializers for function parameters are row-major + * (top-to-bottom left-to-right). */ +static unsigned int get_component_index_from_default_initializer_index(struct hlsl_ctx *ctx, + struct hlsl_type *type, unsigned int index) +{ + unsigned int element_comp_count, element, x, y, i; + unsigned int base = 0; + + if (ctx->profile->major_version < 4) + return index; + + if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) + return index; + + switch (type->class) + { + case HLSL_CLASS_MATRIX: + x = index / type->dimy; + y = index % type->dimy; + return y * type->dimx + x; + + case HLSL_CLASS_ARRAY: + element_comp_count = hlsl_type_component_count(type->e.array.type); + element = index / element_comp_count; + base = element * element_comp_count; + return base + get_component_index_from_default_initializer_index(ctx, type->e.array.type, index - base); + + case HLSL_CLASS_STRUCT: + for (i = 0; i < type->e.record.field_count; ++i) + { + struct hlsl_type *field_type = type->e.record.fields[i].type; + + element_comp_count = hlsl_type_component_count(field_type); + if (index - base < element_comp_count) + return base + get_component_index_from_default_initializer_index(ctx, field_type, index - base); + base += element_comp_count; + } + break; + + default: + return index; + } + vkd3d_unreachable(); +} + static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *instrs, struct hlsl_ir_var *dst, unsigned int *store_index, struct hlsl_ir_node *src) { @@ -2087,12 +2376,33 @@ static void initialize_var_components(struct hlsl_ctx *ctx, struct hlsl_block *i dst_comp_type = hlsl_type_get_component_type(ctx, dst->data_type, *store_index); - if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) - return; + if (dst->default_values) + { + struct hlsl_default_value default_value = {0}; + unsigned int dst_index; - if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) - return; - hlsl_block_add_block(instrs, &block); + if (!hlsl_clone_block(ctx, &block, instrs)) + return; + default_value = evaluate_static_expression(ctx, &block, dst_comp_type, &src->loc); + + if (dst->is_param) + dst_index = *store_index; + else + dst_index = get_component_index_from_default_initializer_index(ctx, dst->data_type, *store_index); + + dst->default_values[dst_index] = default_value; + + hlsl_block_cleanup(&block); + } + else + { + if (!(conv = add_implicit_conversion(ctx, instrs, load, dst_comp_type, &src->loc))) + return; + + if (!hlsl_new_store_component(ctx, &block, &dst_deref, *store_index, conv)) + return; + hlsl_block_add_block(instrs, &block); + } ++*store_index; } @@ -2171,16 +2481,17 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) struct hlsl_semantic new_semantic; uint32_t modifiers = v->modifiers; bool unbounded_res_array = false; + bool constant_buffer = false; struct hlsl_ir_var *var; struct hlsl_type *type; bool local = true; char *var_name; unsigned int i; - assert(basic_type); + VKD3D_ASSERT(basic_type); if (basic_type->class == HLSL_CLASS_MATRIX) - assert(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + VKD3D_ASSERT(basic_type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); type = basic_type; @@ -2190,6 +2501,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) unbounded_res_array |= (v->arrays.sizes[i] == HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT); } + if (type->class == HLSL_CLASS_CONSTANT_BUFFER) + { + type = type->e.resource.format; + constant_buffer = true; + } + if (unbounded_res_array) { if (v->arrays.count == 1) @@ -2246,17 +2563,22 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) } } + if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) + { + /* SM 5.1/6.x descriptor arrays act differently from previous versions. + * Not only are they treated as a single object in reflection, but they + * act as a single component for the purposes of assignment and + * initialization. */ + hlsl_fixme(ctx, &v->loc, "Shader model 5.1+ resource array."); + } + if (!(var_name = vkd3d_strdup(v->name))) return; - new_semantic = v->semantic; - if (v->semantic.name) + if (!hlsl_clone_semantic(ctx, &new_semantic, &v->semantic)) { - if (!(new_semantic.name = vkd3d_strdup(v->semantic.name))) - { - vkd3d_free(var_name); - return; - } + vkd3d_free(var_name); + return; } if (!(var = hlsl_new_var(ctx, var_name, type, &v->loc, &new_semantic, modifiers, &v->reg_reservation))) @@ -2266,7 +2588,18 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) return; } - var->buffer = ctx->cur_buffer; + var->annotations = v->annotations; + + if (constant_buffer && ctx->cur_scope == ctx->globals) + { + if (!(var_name = vkd3d_strdup(v->name))) + return; + var->buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, var_name, modifiers, &v->reg_reservation, NULL, &v->loc); + } + else + { + var->buffer = ctx->cur_buffer; + } if (var->buffer == ctx->globals_buffer) { @@ -2289,8 +2622,11 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) if (!(modifiers & HLSL_STORAGE_STATIC)) var->storage_modifiers |= HLSL_STORAGE_UNIFORM; - if (ctx->profile->major_version < 5 && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + if ((ctx->profile->major_version < 5 || ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT) + && (var->storage_modifiers & HLSL_STORAGE_UNIFORM)) + { check_invalid_object_fields(ctx, var); + } if ((func = hlsl_get_first_func_decl(ctx, var->name))) { @@ -2323,6 +2659,12 @@ static void declare_var(struct hlsl_ctx *ctx, struct parse_variable_def *v) hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INITIALIZER, "Const variable \"%s\" is missing an initializer.", var->name); } + + if (var->annotations) + { + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, + "Annotations are only allowed for objects in the global scope."); + } } if ((var->storage_modifiers & HLSL_STORAGE_STATIC) && type_has_numeric_components(var->data_type) @@ -2348,6 +2690,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var { struct parse_variable_def *v, *v_next; struct hlsl_block *initializers; + unsigned int component_count; struct hlsl_ir_var *var; struct hlsl_type *type; @@ -2371,6 +2714,7 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var } type = var->data_type; + component_count = hlsl_type_component_count(type); var->state_blocks = v->state_blocks; var->state_block_count = v->state_block_count; @@ -2379,51 +2723,78 @@ static struct hlsl_block *initialize_vars(struct hlsl_ctx *ctx, struct list *var v->state_block_capacity = 0; v->state_blocks = NULL; - if (var->state_blocks && hlsl_type_component_count(type) != var->state_block_count) + if (var->state_blocks && component_count != var->state_block_count) { hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Expected %u state blocks, but got %u.", - hlsl_type_component_count(type), var->state_block_count); + "Expected %u state blocks, but got %u.", component_count, var->state_block_count); free_parse_variable_def(v); continue; } if (v->initializer.args_count) { - if (v->initializer.braces) + unsigned int store_index = 0; + bool is_default_values_initializer; + unsigned int size, k; + + is_default_values_initializer = (ctx->cur_buffer != ctx->globals_buffer) + || (var->storage_modifiers & HLSL_STORAGE_UNIFORM) + || ctx->cur_scope->annotations; + + if (is_default_values_initializer) { - unsigned int size = initializer_size(&v->initializer); - unsigned int store_index = 0; - unsigned int k; + /* Default values might have been allocated already for another variable of the same name, + in the same scope. */ + if (var->default_values) + { + free_parse_variable_def(v); + continue; + } - if (hlsl_type_component_count(type) != size) + if (!(var->default_values = hlsl_calloc(ctx, component_count, sizeof(*var->default_values)))) { - hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, - "Expected %u components in initializer, but got %u.", - hlsl_type_component_count(type), size); free_parse_variable_def(v); continue; } + } - for (k = 0; k < v->initializer.args_count; ++k) + if (!v->initializer.braces) + { + if (!(add_implicit_conversion(ctx, v->initializer.instrs, v->initializer.args[0], type, &v->loc))) { - initialize_var_components(ctx, v->initializer.instrs, var, - &store_index, v->initializer.args[k]); + free_parse_variable_def(v); + continue; } + + v->initializer.args[0] = node_from_block(v->initializer.instrs); } - else + + size = initializer_size(&v->initializer); + if (component_count != size) { - struct hlsl_ir_load *load = hlsl_new_var_load(ctx, var, &var->loc); + hlsl_error(ctx, &v->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT, + "Expected %u components in initializer, but got %u.", component_count, size); + free_parse_variable_def(v); + continue; + } - assert(v->initializer.args_count == 1); - hlsl_block_add_instr(v->initializer.instrs, &load->node); - add_assignment(ctx, v->initializer.instrs, &load->node, ASSIGN_OP_ASSIGN, v->initializer.args[0]); + for (k = 0; k < v->initializer.args_count; ++k) + { + initialize_var_components(ctx, v->initializer.instrs, var, &store_index, v->initializer.args[k]); } - if (var->storage_modifiers & HLSL_STORAGE_STATIC) + if (is_default_values_initializer) + { + hlsl_dump_var_default_values(var); + } + else if (var->storage_modifiers & HLSL_STORAGE_STATIC) + { hlsl_block_add_block(&ctx->static_initializers, v->initializer.instrs); + } else + { hlsl_block_add_block(initializers, v->initializer.instrs); + } } else if (var->storage_modifiers & HLSL_STORAGE_STATIC) { @@ -2469,14 +2840,18 @@ static bool func_is_compatible_match(struct hlsl_ctx *ctx, { unsigned int i; - if (decl->parameters.count != args->args_count) + if (decl->parameters.count < args->args_count) return false; - for (i = 0; i < decl->parameters.count; ++i) + for (i = 0; i < args->args_count; ++i) { if (!implicit_compatible_data_types(ctx, args->args[i]->data_type, decl->parameters.vars[i]->data_type)) return false; } + + if (args->args_count < decl->parameters.count && !decl->parameters.vars[args->args_count]->default_values) + return false; + return true; } @@ -2519,11 +2894,11 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu const struct parse_initializer *args, const struct vkd3d_shader_location *loc) { struct hlsl_ir_node *call; - unsigned int i; + unsigned int i, j; - assert(args->args_count == func->parameters.count); + VKD3D_ASSERT(args->args_count <= func->parameters.count); - for (i = 0; i < func->parameters.count; ++i) + for (i = 0; i < args->args_count; ++i) { struct hlsl_ir_var *param = func->parameters.vars[i]; struct hlsl_ir_node *arg = args->args[i]; @@ -2548,11 +2923,43 @@ static bool add_user_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fu } } + /* Add default values for the remaining parameters. */ + for (i = args->args_count; i < func->parameters.count; ++i) + { + struct hlsl_ir_var *param = func->parameters.vars[i]; + unsigned int comp_count = hlsl_type_component_count(param->data_type); + struct hlsl_deref param_deref; + + VKD3D_ASSERT(param->default_values); + + hlsl_init_simple_deref_from_var(¶m_deref, param); + + for (j = 0; j < comp_count; ++j) + { + struct hlsl_type *type = hlsl_type_get_component_type(ctx, param->data_type, j); + struct hlsl_constant_value value; + struct hlsl_ir_node *comp; + struct hlsl_block store_block; + + if (!param->default_values[j].string) + { + value.u[0] = param->default_values[j].number; + if (!(comp = hlsl_new_constant(ctx, type, &value, loc))) + return false; + hlsl_block_add_instr(args->instrs, comp); + + if (!hlsl_new_store_component(ctx, &store_block, ¶m_deref, j, comp)) + return false; + hlsl_block_add_block(args->instrs, &store_block); + } + } + } + if (!(call = hlsl_new_call(ctx, func, loc))) return false; hlsl_block_add_instr(args->instrs, call); - for (i = 0; i < func->parameters.count; ++i) + for (i = 0; i < args->args_count; ++i) { struct hlsl_ir_var *param = func->parameters.vars[i]; struct hlsl_ir_node *arg = args->args[i]; @@ -2699,6 +3106,19 @@ static bool elementwise_intrinsic_float_convert_args(struct hlsl_ctx *ctx, return convert_args(ctx, params, type, loc); } +static bool elementwise_intrinsic_uint_convert_args(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_type *type; + + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) + return false; + + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->dimx, type->dimy); + + return convert_args(ctx, params, type, loc); +} + static bool intrinsic_abs(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -2933,6 +3353,29 @@ static bool intrinsic_asfloat(struct hlsl_ctx *ctx, return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); } +static bool intrinsic_asint(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *data_type; + + data_type = params->args[0]->data_type; + if (data_type->e.numeric.type == HLSL_TYPE_BOOL || data_type->e.numeric.type == HLSL_TYPE_DOUBLE) + { + struct vkd3d_string_buffer *string; + + if ((string = hlsl_type_to_string(ctx, data_type))) + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Wrong argument type of asint(): expected 'int', 'uint', 'float', or 'half', but got '%s'.", + string->buffer); + hlsl_release_string_buffer(ctx, string); + } + data_type = convert_numeric_type(ctx, data_type, HLSL_TYPE_INT); + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_REINTERPRET, operands, data_type, loc); +} + static bool intrinsic_asuint(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3331,26 +3774,69 @@ static bool intrinsic_exp(struct hlsl_ctx *ctx, if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) return false; - /* 1/ln(2) */ - if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) + /* 1/ln(2) */ + if (!(coeff = hlsl_new_float_constant(ctx, 1.442695f, loc))) + return false; + hlsl_block_add_instr(params->instrs, coeff); + + if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, mul, loc); +} + +static bool intrinsic_exp2(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, arg, loc); +} + +static bool intrinsic_faceforward(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_function_decl *func; + struct hlsl_type *type; + char *body; + + static const char template[] = + "%s faceforward(%s n, %s i, %s ng)\n" + "{\n" + " return dot(i, ng) < 0 ? n : -n;\n" + "}\n"; + + if (!(type = elementwise_intrinsic_get_common_type(ctx, params, loc))) return false; - hlsl_block_add_instr(params->instrs, coeff); + type = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_FLOAT, type->dimx, type->dimy); - if (!(mul = add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, coeff, params->args[0], loc))) + if (!(body = hlsl_sprintf_alloc(ctx, template, + type->name, type->name, type->name, type->name))) + return false; + func = hlsl_compile_internal_function(ctx, "faceforward", body); + vkd3d_free(body); + if (!func) return false; - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, mul, loc); + return add_user_call(ctx, func, params, loc); } -static bool intrinsic_exp2(struct hlsl_ctx *ctx, +static bool intrinsic_f16tof32(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { - struct hlsl_ir_node *arg; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_type *type; - if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + if (!elementwise_intrinsic_uint_convert_args(ctx, params, loc)) return false; - return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_EXP2, arg, loc); + type = convert_numeric_type(ctx, params->args[0]->data_type, HLSL_TYPE_FLOAT); + + operands[0] = params->args[0]; + return add_expr(ctx, params->instrs, HLSL_OP1_F16TOF32, operands, type, loc); } static bool intrinsic_floor(struct hlsl_ctx *ctx, @@ -3646,12 +4132,12 @@ static bool intrinsic_mul(struct hlsl_ctx *ctx, } else if (vect_count == 1) { - assert(matrix_type->dimx == 1 || matrix_type->dimy == 1); + VKD3D_ASSERT(matrix_type->dimx == 1 || matrix_type->dimy == 1); ret_type = hlsl_get_vector_type(ctx, base, matrix_type->dimx * matrix_type->dimy); } else { - assert(matrix_type->dimx == 1 && matrix_type->dimy == 1); + VKD3D_ASSERT(matrix_type->dimx == 1 && matrix_type->dimy == 1); ret_type = hlsl_get_scalar_type(ctx, base); } @@ -3764,6 +4250,17 @@ static bool intrinsic_radians(struct hlsl_ctx *ctx, return !!add_binary_arithmetic_expr(ctx, params->instrs, HLSL_OP2_MUL, arg, rad, loc); } +static bool intrinsic_rcp(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *arg; + + if (!(arg = intrinsic_float_convert_arg(ctx, params, params->args[0], loc))) + return false; + + return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_RCP, arg, loc); +} + static bool intrinsic_reflect(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -3814,7 +4311,7 @@ static bool intrinsic_refract(struct hlsl_ctx *ctx, return false; } - assert(params->args_count == 3); + VKD3D_ASSERT(params->args_count == 3); mut_params = *params; mut_params.args_count = 2; if (!(res_type = elementwise_intrinsic_get_common_type(ctx, &mut_params, loc))) @@ -4032,6 +4529,7 @@ static bool intrinsic_tanh(struct hlsl_ctx *ctx, static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc, const char *name, enum hlsl_sampler_dim dim) { + unsigned int sampler_dim = hlsl_sampler_dim_count(dim); struct hlsl_resource_load_params load_params = { 0 }; const struct hlsl_type *sampler_type; struct hlsl_ir_node *coords, *sample; @@ -4043,11 +4541,6 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * return false; } - if (params->args_count == 4) - { - hlsl_fixme(ctx, loc, "Samples with gradients are not implemented."); - } - sampler_type = params->args[0]->data_type; if (sampler_type->class != HLSL_CLASS_SAMPLER || (sampler_type->sampler_dim != dim && sampler_type->sampler_dim != HLSL_SAMPLER_DIM_GENERIC)) @@ -4061,18 +4554,22 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * hlsl_release_string_buffer(ctx, string); } - if (!strcmp(name, "tex2Dlod")) + if (!strcmp(name, "tex2Dbias") + || !strcmp(name, "tex2Dlod")) { struct hlsl_ir_node *lod, *c; - load_params.type = HLSL_RESOURCE_SAMPLE_LOD; + if (!strcmp(name, "tex2Dlod")) + load_params.type = HLSL_RESOURCE_SAMPLE_LOD; + else + load_params.type = HLSL_RESOURCE_SAMPLE_LOD_BIAS; - if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), hlsl_sampler_dim_count(dim), params->args[1], loc))) + if (!(c = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, params->args[1], loc))) return false; hlsl_block_add_instr(params->instrs, c); - if (!(coords = add_implicit_conversion(ctx, params->instrs, c, hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, - hlsl_sampler_dim_count(dim)), loc))) + if (!(coords = add_implicit_conversion(ctx, params->instrs, c, + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) { return false; } @@ -4099,14 +4596,13 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * if (hlsl_version_ge(ctx, 4, 0)) { - unsigned int count = hlsl_sampler_dim_count(dim); struct hlsl_ir_node *divisor; - if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), count, coords, loc))) + if (!(divisor = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(W, W, W, W), sampler_dim, coords, loc))) return false; hlsl_block_add_instr(params->instrs, divisor); - if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), count, coords, loc))) + if (!(coords = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, Y, Z, W), sampler_dim, coords, loc))) return false; hlsl_block_add_instr(params->instrs, coords); @@ -4120,12 +4616,34 @@ static bool intrinsic_tex(struct hlsl_ctx *ctx, const struct parse_initializer * load_params.type = HLSL_RESOURCE_SAMPLE_PROJ; } } + else if (params->args_count == 4) /* Gradient sampling. */ + { + if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + { + return false; + } + + if (!(load_params.ddx = add_implicit_conversion(ctx, params->instrs, params->args[2], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + { + return false; + } + + if (!(load_params.ddy = add_implicit_conversion(ctx, params->instrs, params->args[3], + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) + { + return false; + } + + load_params.type = HLSL_RESOURCE_SAMPLE_GRAD; + } else { load_params.type = HLSL_RESOURCE_SAMPLE; if (!(coords = add_implicit_conversion(ctx, params->instrs, params->args[1], - hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, hlsl_sampler_dim_count(dim)), loc))) + hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, sampler_dim), loc))) { return false; } @@ -4181,12 +4699,30 @@ static bool intrinsic_tex1D(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "tex1D", HLSL_SAMPLER_DIM_1D); } +static bool intrinsic_tex1Dgrad(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex1Dgrad", HLSL_SAMPLER_DIM_1D); +} + static bool intrinsic_tex2D(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { return intrinsic_tex(ctx, params, loc, "tex2D", HLSL_SAMPLER_DIM_2D); } +static bool intrinsic_tex2Dbias(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex2Dbias", HLSL_SAMPLER_DIM_2D); +} + +static bool intrinsic_tex2Dgrad(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex2Dgrad", HLSL_SAMPLER_DIM_2D); +} + static bool intrinsic_tex2Dlod(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4205,6 +4741,12 @@ static bool intrinsic_tex3D(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "tex3D", HLSL_SAMPLER_DIM_3D); } +static bool intrinsic_tex3Dgrad(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "tex3Dgrad", HLSL_SAMPLER_DIM_3D); +} + static bool intrinsic_tex3Dproj(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4217,6 +4759,12 @@ static bool intrinsic_texCUBE(struct hlsl_ctx *ctx, return intrinsic_tex(ctx, params, loc, "texCUBE", HLSL_SAMPLER_DIM_CUBE); } +static bool intrinsic_texCUBEgrad(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + return intrinsic_tex(ctx, params, loc, "texCUBEgrad", HLSL_SAMPLER_DIM_CUBE); +} + static bool intrinsic_texCUBEproj(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4336,6 +4884,20 @@ static bool intrinsic_d3dcolor_to_ubyte4(struct hlsl_ctx *ctx, return true; } +static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_node *expr; + + if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT, + operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc))) + return false; + hlsl_block_add_instr(params->instrs, expr); + + return true; +} + static const struct intrinsic_function { const char *name; @@ -4348,12 +4910,14 @@ intrinsic_functions[] = { /* Note: these entries should be kept in alphabetical order. */ {"D3DCOLORtoUBYTE4", 1, true, intrinsic_d3dcolor_to_ubyte4}, + {"GetRenderTargetSampleCount", 0, true, intrinsic_GetRenderTargetSampleCount}, {"abs", 1, true, intrinsic_abs}, {"acos", 1, true, intrinsic_acos}, {"all", 1, true, intrinsic_all}, {"any", 1, true, intrinsic_any}, {"asfloat", 1, true, intrinsic_asfloat}, {"asin", 1, true, intrinsic_asin}, + {"asint", 1, true, intrinsic_asint}, {"asuint", -1, true, intrinsic_asuint}, {"atan", 1, true, intrinsic_atan}, {"atan2", 2, true, intrinsic_atan2}, @@ -4375,6 +4939,8 @@ intrinsic_functions[] = {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, {"exp2", 1, true, intrinsic_exp2}, + {"f16tof32", 1, true, intrinsic_f16tof32}, + {"faceforward", 3, true, intrinsic_faceforward}, {"floor", 1, true, intrinsic_floor}, {"fmod", 2, true, intrinsic_fmod}, {"frac", 1, true, intrinsic_frac}, @@ -4392,6 +4958,7 @@ intrinsic_functions[] = {"normalize", 1, true, intrinsic_normalize}, {"pow", 2, true, intrinsic_pow}, {"radians", 1, true, intrinsic_radians}, + {"rcp", 1, true, intrinsic_rcp}, {"reflect", 2, true, intrinsic_reflect}, {"refract", 3, true, intrinsic_refract}, {"round", 1, true, intrinsic_round}, @@ -4406,12 +4973,17 @@ intrinsic_functions[] = {"tan", 1, true, intrinsic_tan}, {"tanh", 1, true, intrinsic_tanh}, {"tex1D", -1, false, intrinsic_tex1D}, + {"tex1Dgrad", 4, false, intrinsic_tex1Dgrad}, {"tex2D", -1, false, intrinsic_tex2D}, + {"tex2Dbias", 2, false, intrinsic_tex2Dbias}, + {"tex2Dgrad", 4, false, intrinsic_tex2Dgrad}, {"tex2Dlod", 2, false, intrinsic_tex2Dlod}, {"tex2Dproj", 2, false, intrinsic_tex2Dproj}, {"tex3D", -1, false, intrinsic_tex3D}, + {"tex3Dgrad", 4, false, intrinsic_tex3Dgrad}, {"tex3Dproj", 2, false, intrinsic_tex3Dproj}, {"texCUBE", -1, false, intrinsic_texCUBE}, + {"texCUBEgrad", 4, false, intrinsic_texCUBEgrad}, {"texCUBEproj", 2, false, intrinsic_texCUBEproj}, {"transpose", 1, true, intrinsic_transpose}, {"trunc", 1, true, intrinsic_trunc}, @@ -4599,7 +5171,7 @@ static bool add_ternary(struct hlsl_ctx *ctx, struct hlsl_block *block, common_type = first->data_type; } - assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); args[0] = cond; args[1] = first; @@ -5481,6 +6053,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %token KW_BREAK %token KW_BUFFER %token KW_CASE +%token KW_CONSTANTBUFFER %token KW_CBUFFER %token KW_CENTROID %token KW_COLUMN_MAJOR @@ -5513,6 +6086,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %token KW_NAMESPACE %token KW_NOINTERPOLATION %token KW_NOPERSPECTIVE +%token KW_NULL %token KW_OUT %token KW_PACKOFFSET %token KW_PASS @@ -5566,6 +6140,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %token KW_TEXTURECUBEARRAY %token KW_TRUE %token KW_TYPEDEF +%token KW_UNSIGNED %token KW_UNIFORM %token KW_VECTOR %token KW_VERTEXSHADER @@ -5670,6 +6245,8 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %type if_body +%type array + %type var_modifiers %type any_identifier @@ -5678,6 +6255,7 @@ static bool state_block_add_entry(struct hlsl_state_block *state_block, struct h %type name_opt %type parameter +%type parameter_decl %type param_list %type parameters @@ -5717,8 +6295,7 @@ hlsl_prog: | hlsl_prog buffer_declaration buffer_body | hlsl_prog declaration_statement { - if (!list_empty(&$2->instrs)) - hlsl_fixme(ctx, &@2, "Uniform initializer."); + hlsl_block_add_block(&ctx->static_initializers, $2); destroy_block($2); } | hlsl_prog preproc_directive @@ -5742,19 +6319,31 @@ pass: annotations_list: variables_def_typed ';' + { + struct hlsl_block *block; + + block = initialize_vars(ctx, $1); + destroy_block(block); + } | annotations_list variables_def_typed ';' + { + struct hlsl_block *block; + + block = initialize_vars(ctx, $2); + destroy_block(block); + } annotations_opt: %empty { $$ = NULL; } - | '<' scope_start '>' + | '<' annotations_scope_start '>' { hlsl_pop_scope(ctx); $$ = NULL; } - | '<' scope_start annotations_list '>' + | '<' annotations_scope_start annotations_list '>' { struct hlsl_scope *scope = ctx->cur_scope; @@ -6083,7 +6672,7 @@ func_declaration: if (!$1.first) { - assert(decl->parameters.count == $1.parameters.count); + VKD3D_ASSERT(decl->parameters.count == $1.parameters.count); for (i = 0; i < $1.parameters.count; ++i) { @@ -6198,7 +6787,7 @@ func_prototype_no_attrs: * brittle and ugly. */ - assert($5.count == params->count); + VKD3D_ASSERT($5.count == params->count); for (i = 0; i < params->count; ++i) { struct hlsl_ir_var *orig_param = params->vars[i]; @@ -6282,6 +6871,13 @@ switch_scope_start: ctx->cur_scope->_switch = true; } +annotations_scope_start: + %empty + { + hlsl_push_scope(ctx); + ctx->cur_scope->annotations = true; + } + var_identifier: VAR_IDENTIFIER | NEW_IDENTIFIER @@ -6315,6 +6911,9 @@ semantic: { char *p; + if (!($$.raw_name = hlsl_strdup(ctx, $2))) + YYABORT; + for (p = $2 + strlen($2); p > $2 && isdigit(p[-1]); --p) ; $$.name = $2; @@ -6330,22 +6929,34 @@ register_reservation: ':' KW_REGISTER '(' any_identifier ')' { memset(&$$, 0, sizeof($$)); - if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) + if (!parse_reservation_index(ctx, $4, 0, &$$)) + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $4); + + vkd3d_free($4); + } + | ':' KW_REGISTER '(' any_identifier '[' expr ']' ')' + { + memset(&$$, 0, sizeof($$)); + if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) + { hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "Invalid register reservation '%s'.", $4); + } vkd3d_free($4); + vkd3d_free($6); } | ':' KW_REGISTER '(' any_identifier ',' any_identifier ')' { memset(&$$, 0, sizeof($$)); - if (parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) + if (parse_reservation_index(ctx, $6, 0, &$$)) { hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); } else if (parse_reservation_space($6, &$$.reg_space)) { - if (!parse_reservation_index($4, &$$.reg_type, &$$.reg_index)) + if (!parse_reservation_index(ctx, $4, 0, &$$)) hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "Invalid register reservation '%s'.", $4); } @@ -6358,12 +6969,45 @@ register_reservation: vkd3d_free($4); vkd3d_free($6); } + | ':' KW_REGISTER '(' any_identifier '[' expr ']' ',' any_identifier ')' + { + memset(&$$, 0, sizeof($$)); + + if (!parse_reservation_space($9, &$$.reg_space)) + hlsl_error(ctx, &@9, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register space reservation '%s'.", $9); + + if (!parse_reservation_index(ctx, $4, evaluate_static_expression_as_uint(ctx, $6, &@6), &$$)) + { + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $4); + } + + vkd3d_free($4); + vkd3d_free($6); + vkd3d_free($9); + } + | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ')' + { + hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); + + memset(&$$, 0, sizeof($$)); + if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) + { + hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $6); + } + + vkd3d_free($4); + vkd3d_free($6); + vkd3d_free($8); + } | ':' KW_REGISTER '(' any_identifier ',' any_identifier ',' any_identifier ')' { hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); memset(&$$, 0, sizeof($$)); - if (!parse_reservation_index($6, &$$.reg_type, &$$.reg_index)) + if (!parse_reservation_index(ctx, $6, 0, &$$)) hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, "Invalid register reservation '%s'.", $6); @@ -6375,6 +7019,26 @@ register_reservation: vkd3d_free($6); vkd3d_free($8); } + | ':' KW_REGISTER '(' any_identifier ',' any_identifier '[' expr ']' ',' any_identifier ')' + { + hlsl_fixme(ctx, &@4, "Reservation shader target %s.", $4); + + memset(&$$, 0, sizeof($$)); + if (!parse_reservation_index(ctx, $6, evaluate_static_expression_as_uint(ctx, $8, &@8), &$$)) + { + hlsl_error(ctx, &@6, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register reservation '%s'.", $6); + } + + if (!parse_reservation_space($11, &$$.reg_space)) + hlsl_error(ctx, &@11, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Invalid register space reservation '%s'.", $11); + + vkd3d_free($4); + vkd3d_free($6); + vkd3d_free($8); + vkd3d_free($11); + } packoffset_reservation: ':' KW_PACKOFFSET '(' any_identifier ')' @@ -6427,6 +7091,14 @@ param_list: } parameter: + parameter_decl + | parameter_decl '=' complex_initializer + { + $$ = $1; + $$.initializer = $3; + } + +parameter_decl: var_modifiers type_no_void any_identifier arrays colon_attribute { uint32_t modifiers = $1; @@ -6449,11 +7121,18 @@ parameter: } type = hlsl_new_array_type(ctx, type, $4.sizes[i]); } + vkd3d_free($4.sizes); + $$.type = type; + if (hlsl_version_ge(ctx, 5, 1) && type->class == HLSL_CLASS_ARRAY && hlsl_type_is_resource(type)) + hlsl_fixme(ctx, &@2, "Shader model 5.1+ resource array."); + $$.name = $3; $$.semantic = $5.semantic; $$.reg_reservation = $5.reg_reservation; + + memset(&$$.initializer, 0, sizeof($$.initializer)); } texture_type: @@ -6662,12 +7341,6 @@ type_no_void: { validate_texture_format_type(ctx, $3, &@3); - if (hlsl_version_lt(ctx, 4, 1)) - { - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, - "Multisampled texture object declaration needs sample count for profile %s.", ctx->profile->name); - } - $$ = hlsl_new_texture_type(ctx, $1, $3, 0); } | texture_ms_type '<' type ',' shift_expr '>' @@ -6696,6 +7369,10 @@ type_no_void: validate_uav_type(ctx, $1, $3, &@3); $$ = hlsl_new_uav_type(ctx, $1, $3, true); } + | KW_STRING + { + $$ = ctx->builtin_types.string; + } | TYPE_IDENTIFIER { $$ = hlsl_get_type(ctx->cur_scope, $1, true, true); @@ -6713,6 +7390,26 @@ type_no_void: } vkd3d_free($1); } + | KW_UNSIGNED TYPE_IDENTIFIER + { + struct hlsl_type *type = hlsl_get_type(ctx->cur_scope, $2, true, true); + + if (hlsl_is_numeric_type(type) && type->e.numeric.type == HLSL_TYPE_INT) + { + if (!(type = hlsl_type_clone(ctx, type, 0, 0))) + YYABORT; + vkd3d_free((void *)type->name); + type->name = NULL; + type->e.numeric.type = HLSL_TYPE_UINT; + } + else + { + hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "The 'unsigned' keyword can't be used with type %s.", $2); + } + + $$ = type; + } | KW_STRUCT TYPE_IDENTIFIER { $$ = hlsl_get_type(ctx->cur_scope, $2, true, true); @@ -6724,6 +7421,10 @@ type_no_void: { $$ = hlsl_get_type(ctx->cur_scope, "RenderTargetView", true, true); } + | KW_DEPTHSTENCILSTATE + { + $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilState", true, true); + } | KW_DEPTHSTENCILVIEW { $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); @@ -6736,6 +7437,37 @@ type_no_void: { $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); } + | KW_COMPUTESHADER + { + $$ = hlsl_get_type(ctx->cur_scope, "ComputeShader", true, true); + } + | KW_DOMAINSHADER + { + $$ = hlsl_get_type(ctx->cur_scope, "DomainShader", true, true); + } + | KW_HULLSHADER + { + $$ = hlsl_get_type(ctx->cur_scope, "HullShader", true, true); + } + | KW_GEOMETRYSHADER + { + $$ = hlsl_get_type(ctx->cur_scope, "GeometryShader", true, true); + } + | KW_CONSTANTBUFFER '<' type '>' + { + if ($3->class != HLSL_CLASS_STRUCT) + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "ConstantBuffer<...> requires user-defined structure type."); + $$ = hlsl_new_cb_type(ctx, $3); + } + | KW_RASTERIZERSTATE + { + $$ = hlsl_get_type(ctx->cur_scope, "RasterizerState", true, true); + } + | KW_BLENDSTATE + { + $$ = hlsl_get_type(ctx->cur_scope, "BlendState", true, true); + } type: type_no_void @@ -6840,10 +7572,10 @@ variables_def_typed: { struct parse_variable_def *head_def; - assert(!list_empty($1)); + VKD3D_ASSERT(!list_empty($1)); head_def = LIST_ENTRY(list_head($1), struct parse_variable_def, entry); - assert(head_def->basic_type); + VKD3D_ASSERT(head_def->basic_type); $3->basic_type = head_def->basic_type; $3->modifiers = head_def->modifiers; $3->modifiers_loc = head_def->modifiers_loc; @@ -6855,7 +7587,7 @@ variables_def_typed: } variable_decl: - any_identifier arrays colon_attribute + any_identifier arrays colon_attribute annotations_opt { $$ = hlsl_alloc(ctx, sizeof(*$$)); $$->loc = @1; @@ -6863,6 +7595,7 @@ variable_decl: $$->arrays = $2; $$->semantic = $3.semantic; $$->reg_reservation = $3.reg_reservation; + $$->annotations = $4; } state_block_start: @@ -6932,6 +7665,34 @@ state_block: hlsl_src_from_node(&entry->args[i], $5.args[i]); vkd3d_free($5.args); + $$ = $1; + state_block_add_entry($$, entry); + } + | state_block any_identifier '(' func_arguments ')' ';' + { + struct hlsl_state_block_entry *entry; + unsigned int i; + + if (!(entry = hlsl_alloc(ctx, sizeof(*entry)))) + YYABORT; + + entry->is_function_call = true; + + entry->name = $2; + entry->lhs_has_index = false; + entry->lhs_index = 0; + + entry->instrs = $4.instrs; + + entry->args_count = $4.args_count; + if (!(entry->args = hlsl_alloc(ctx, sizeof(*entry->args) * entry->args_count))) + YYABORT; + for (i = 0; i < entry->args_count; ++i) + hlsl_src_from_node(&entry->args[i], $4.args[i]); + vkd3d_free($4.args); + + hlsl_validate_state_block_entry(ctx, entry, &@4); + $$ = $1; state_block_add_entry($$, entry); } @@ -7020,52 +7781,43 @@ variable_def_typed: $$->modifiers_loc = @1; } -arrays: - %empty +array: + '[' ']' { - $$.sizes = NULL; - $$.count = 0; + $$ = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; } - | '[' expr ']' arrays + | '[' expr ']' { - uint32_t *new_array; - unsigned int size; - - size = evaluate_static_expression_as_uint(ctx, $2, &@2); - - destroy_block($2); - - $$ = $4; + $$ = evaluate_static_expression_as_uint(ctx, $2, &@2); - if (!size) + if (!$$) { hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, "Array size is not a positive integer constant."); - vkd3d_free($$.sizes); YYABORT; } - if (size > 65536) + if ($$ > 65536) { hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE, - "Array size %u is not between 1 and 65536.", size); - vkd3d_free($$.sizes); + "Array size %u is not between 1 and 65536.", $$); YYABORT; } - if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) - { - vkd3d_free($$.sizes); - YYABORT; - } - $$.sizes = new_array; - $$.sizes[$$.count++] = size; + destroy_block($2); + } + +arrays: + %empty + { + $$.sizes = NULL; + $$.count = 0; } - | '[' ']' arrays + | array arrays { uint32_t *new_array; - $$ = $3; + $$ = $2; if (!(new_array = hlsl_realloc(ctx, $$.sizes, ($$.count + 1) * sizeof(*new_array)))) { @@ -7074,7 +7826,7 @@ arrays: } $$.sizes = new_array; - $$.sizes[$$.count++] = HLSL_ARRAY_ELEMENTS_COUNT_IMPLICIT; + $$.sizes[$$.count++] = $1; } var_modifiers: @@ -7156,6 +7908,8 @@ var_modifiers: } | var_identifier var_modifiers { + $$ = $2; + if (!strcmp($1, "precise")) $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); else if (!strcmp($1, "single")) @@ -7587,6 +8341,35 @@ primary_expr: YYABORT; } } + | STRING + { + struct hlsl_ir_node *c; + + if (!(c = hlsl_new_string_constant(ctx, $1, &@1))) + { + vkd3d_free($1); + YYABORT; + } + vkd3d_free($1); + + if (!($$ = make_block(ctx, c))) + { + hlsl_free_instr(c); + YYABORT; + } + } + | KW_NULL + { + struct hlsl_ir_node *c; + + if (!(c = hlsl_new_null_constant(ctx, &@1))) + YYABORT; + if (!($$ = make_block(ctx, c))) + { + hlsl_free_instr(c); + YYABORT; + } + } | VAR_IDENTIFIER { struct hlsl_ir_load *load; diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index bdb72a1fab9..a695eefabf6 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -20,6 +20,7 @@ #include "hlsl.h" #include +#include /* TODO: remove when no longer needed, only used for new_offset_instr_from_deref() */ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, struct hlsl_block *block, @@ -52,7 +53,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str if (regset == HLSL_REGSET_NUMERIC) { - assert(size % 4 == 0); + VKD3D_ASSERT(size % 4 == 0); size /= 4; } @@ -75,7 +76,7 @@ static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, str if (regset == HLSL_REGSET_NUMERIC) { - assert(*offset_component == 0); + VKD3D_ASSERT(*offset_component == 0); *offset_component = field_offset % 4; field_offset /= 4; } @@ -120,7 +121,7 @@ static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, st return NULL; hlsl_block_add_instr(block, offset); - assert(deref->var); + VKD3D_ASSERT(deref->var); type = deref->var->data_type; for (i = 0; i < deref->path_len; ++i) @@ -153,8 +154,8 @@ static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_der struct hlsl_block block; struct hlsl_type *type; - assert(deref->var); - assert(!hlsl_deref_is_lowered(deref)); + VKD3D_ASSERT(deref->var); + VKD3D_ASSERT(!hlsl_deref_is_lowered(deref)); type = hlsl_deref_get_type(ctx, deref); @@ -218,6 +219,14 @@ static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, uniform->is_uniform = 1; uniform->is_param = temp->is_param; uniform->buffer = temp->buffer; + if (temp->default_values) + { + /* Transfer default values from the temp to the uniform. */ + VKD3D_ASSERT(!uniform->default_values); + VKD3D_ASSERT(hlsl_type_component_count(temp->data_type) == hlsl_type_component_count(uniform->data_type)); + uniform->default_values = temp->default_values; + temp->default_values = NULL; + } if (!(new_name = hlsl_sprintf_alloc(ctx, "", temp->name))) return; @@ -312,7 +321,7 @@ static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir } } - if (!(new_semantic.name = hlsl_strdup(ctx, semantic->name))) + if (!(hlsl_clone_semantic(ctx, &new_semantic, semantic))) { vkd3d_free(new_name); return NULL; @@ -390,7 +399,7 @@ static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s } else { - assert(i == 0); + VKD3D_ASSERT(i == 0); if (!(store = hlsl_new_store_index(ctx, &lhs->src, NULL, cast, 0, &var->loc))) return; @@ -523,7 +532,7 @@ static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, s } else { - assert(i == 0); + VKD3D_ASSERT(i == 0); if (!(load = hlsl_new_load_index(ctx, &rhs->src, NULL, &var->loc))) return; @@ -918,7 +927,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun if (return_instr) { /* If we're in a loop, we should have used "break" instead. */ - assert(!in_loop); + VKD3D_ASSERT(!in_loop); /* Iterate in reverse, to avoid use-after-free when unlinking sources from * the "uses" list. */ @@ -940,7 +949,7 @@ static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *fun struct hlsl_ir_load *load; /* If we're in a loop, we should have used "break" instead. */ - assert(!in_loop); + VKD3D_ASSERT(!in_loop); if (tail == &cf_instr->entry) return has_early_return; @@ -999,7 +1008,7 @@ static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct h struct hlsl_deref coords_deref; struct hlsl_ir_var *coords; - assert(dim_count < 4); + VKD3D_ASSERT(dim_count < 4); if (!(coords = hlsl_new_synthetic_var(ctx, "coords", hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc))) @@ -1100,9 +1109,9 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_resource_load_params params = {0}; struct hlsl_ir_node *resource_load; - assert(coords->data_type->class == HLSL_CLASS_VECTOR); - assert(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); - assert(coords->data_type->dimx == dim_count); + VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR); + VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT); + VKD3D_ASSERT(coords->data_type->dimx == dim_count); if (!(coords = add_zero_mipmap_level(ctx, coords, &instr->loc))) return false; @@ -1132,7 +1141,7 @@ static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_deref row_deref; unsigned int i; - assert(!hlsl_type_is_row_major(mat->data_type)); + VKD3D_ASSERT(!hlsl_type_is_row_major(mat->data_type)); if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc))) return false; @@ -1369,7 +1378,7 @@ static struct copy_propagation_value *copy_propagation_get_value(const struct co unsigned int component_count = hlsl_type_component_count(var->data_type); struct copy_propagation_value *value; - assert(component < component_count); + VKD3D_ASSERT(component < component_count); value = copy_propagation_get_value_at_time(&var_def->traces[component], time); if (!value) @@ -1402,7 +1411,7 @@ static struct copy_propagation_var_def *copy_propagation_create_var_def(struct h var_def->var = var; res = rb_put(&state->var_defs, var, &var_def->entry); - assert(!res); + VKD3D_ASSERT(!res); return var_def; } @@ -1411,7 +1420,7 @@ static void copy_propagation_trace_record_value(struct hlsl_ctx *ctx, struct copy_propagation_component_trace *trace, struct hlsl_ir_node *node, unsigned int component, unsigned int time) { - assert(!trace->record_count || trace->records[trace->record_count - 1].timestamp < time); + VKD3D_ASSERT(!trace->record_count || trace->records[trace->record_count - 1].timestamp < time); if (!hlsl_array_reserve(ctx, (void **)&trace->records, &trace->record_capacity, trace->record_count + 1, sizeof(trace->records[0]))) @@ -1440,7 +1449,7 @@ static void copy_propagation_invalidate_variable(struct hlsl_ctx *ctx, struct co /* Don't add an invalidate record if it is already present. */ if (trace->record_count && trace->records[trace->record_count - 1].timestamp == time) { - assert(!trace->records[trace->record_count - 1].node); + VKD3D_ASSERT(!trace->records[trace->record_count - 1].node); continue; } @@ -1623,27 +1632,36 @@ static bool copy_propagation_transform_load(struct hlsl_ctx *ctx, switch (type->class) { + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_SCALAR: case HLSL_CLASS_VECTOR: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_SAMPLER: + case HLSL_CLASS_STRING: case HLSL_CLASS_TEXTURE: case HLSL_CLASS_UAV: case HLSL_CLASS_VERTEX_SHADER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_RENDER_TARGET_VIEW: + case HLSL_CLASS_DEPTH_STENCIL_VIEW: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_NULL: break; case HLSL_CLASS_MATRIX: case HLSL_CLASS_ARRAY: case HLSL_CLASS_STRUCT: + case HLSL_CLASS_CONSTANT_BUFFER: /* FIXME: Actually we shouldn't even get here, but we don't split * matrices yet. */ return false; - case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_PASS: - case HLSL_CLASS_RENDER_TARGET_VIEW: - case HLSL_CLASS_STRING: case HLSL_CLASS_TECHNIQUE: case HLSL_CLASS_VOID: vkd3d_unreachable(); @@ -1685,11 +1703,11 @@ static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx, if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count)) return false; - assert(count == 1); + VKD3D_ASSERT(count == 1); if (!(value = copy_propagation_get_value(state, deref->var, start, time))) return false; - assert(value->component == 0); + VKD3D_ASSERT(value->component == 0); /* Only HLSL_IR_LOAD can produce an object. */ load = hlsl_ir_load(value->node); @@ -1970,6 +1988,76 @@ bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *bloc return progress; } +enum validation_result +{ + DEREF_VALIDATION_OK, + DEREF_VALIDATION_OUT_OF_BOUNDS, + DEREF_VALIDATION_NOT_CONSTANT, +}; + +static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx, + const struct hlsl_deref *deref) +{ + struct hlsl_type *type = deref->var->data_type; + unsigned int i; + + for (i = 0; i < deref->path_len; ++i) + { + struct hlsl_ir_node *path_node = deref->path[i].node; + unsigned int idx = 0; + + VKD3D_ASSERT(path_node); + if (path_node->type != HLSL_IR_CONSTANT) + return DEREF_VALIDATION_NOT_CONSTANT; + + /* We should always have generated a cast to UINT. */ + VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR + && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); + + idx = hlsl_ir_constant(path_node)->value.u[0].u; + + switch (type->class) + { + case HLSL_CLASS_VECTOR: + if (idx >= type->dimx) + { + hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Vector index is out of bounds. %u/%u", idx, type->dimx); + return DEREF_VALIDATION_OUT_OF_BOUNDS; + } + break; + + case HLSL_CLASS_MATRIX: + if (idx >= hlsl_type_major_size(type)) + { + hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); + return DEREF_VALIDATION_OUT_OF_BOUNDS; + } + break; + + case HLSL_CLASS_ARRAY: + if (idx >= type->e.array.elements_count) + { + hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); + return DEREF_VALIDATION_OUT_OF_BOUNDS; + } + break; + + case HLSL_CLASS_STRUCT: + break; + + default: + vkd3d_unreachable(); + } + + type = hlsl_get_element_type_from_path_index(ctx, type, path_node); + } + + return DEREF_VALIDATION_OK; +} + static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, const char *usage) { @@ -1979,7 +2067,7 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct { struct hlsl_ir_node *path_node = deref->path[i].node; - assert(path_node); + VKD3D_ASSERT(path_node); if (path_node->type != HLSL_IR_CONSTANT) hlsl_note(ctx, &path_node->loc, VKD3D_SHADER_LOG_ERROR, "Expression for %s within \"%s\" cannot be resolved statically.", @@ -1987,60 +2075,77 @@ static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct } } -static bool validate_static_object_references(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, +static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { - unsigned int start, count; - - if (instr->type == HLSL_IR_RESOURCE_LOAD) + switch (instr->type) { - struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); - - if (!load->resource.var->is_uniform) + case HLSL_IR_RESOURCE_LOAD: { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Loaded resource must have a single uniform source."); + struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr); + + if (!load->resource.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Loaded resource must have a single uniform source."); + } + else if (validate_component_index_range_from_deref(ctx, &load->resource) == DEREF_VALIDATION_NOT_CONSTANT) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Loaded resource from \"%s\" must be determinable at compile time.", + load->resource.var->name); + note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); + } + + if (load->sampler.var) + { + if (!load->sampler.var->is_uniform) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Resource load sampler must have a single uniform source."); + } + else if (validate_component_index_range_from_deref(ctx, &load->sampler) == DEREF_VALIDATION_NOT_CONSTANT) + { + hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, + "Resource load sampler from \"%s\" must be determinable at compile time.", + load->sampler.var->name); + note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); + } + } + break; } - else if (!hlsl_component_index_range_from_deref(ctx, &load->resource, &start, &count)) + case HLSL_IR_RESOURCE_STORE: { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Loaded resource from \"%s\" must be determinable at compile time.", - load->resource.var->name); - note_non_static_deref_expressions(ctx, &load->resource, "loaded resource"); - } + struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - if (load->sampler.var) - { - if (!load->sampler.var->is_uniform) + if (!store->resource.var->is_uniform) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Resource load sampler must have a single uniform source."); + "Accessed resource must have a single uniform source."); } - else if (!hlsl_component_index_range_from_deref(ctx, &load->sampler, &start, &count)) + else if (validate_component_index_range_from_deref(ctx, &store->resource) == DEREF_VALIDATION_NOT_CONSTANT) { hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Resource load sampler from \"%s\" must be determinable at compile time.", - load->sampler.var->name); - note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler"); + "Accessed resource from \"%s\" must be determinable at compile time.", + store->resource.var->name); + note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); } + break; } - } - else if (instr->type == HLSL_IR_RESOURCE_STORE) - { - struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr); - - if (!store->resource.var->is_uniform) + case HLSL_IR_LOAD: { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Accessed resource must have a single uniform source."); + struct hlsl_ir_load *load = hlsl_ir_load(instr); + validate_component_index_range_from_deref(ctx, &load->src); + break; } - else if (!hlsl_component_index_range_from_deref(ctx, &store->resource, &start, &count)) + case HLSL_IR_STORE: { - hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF, - "Accessed resource from \"%s\" must be determinable at compile time.", - store->resource.var->name); - note_non_static_deref_expressions(ctx, &store->resource, "accessed resource"); + struct hlsl_ir_store *store = hlsl_ir_store(instr); + validate_component_index_range_from_deref(ctx, &store->lhs); + break; } + default: + break; } return false; @@ -2436,7 +2541,7 @@ static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir return false; deref = &hlsl_ir_load(instr)->src; - assert(deref->var); + VKD3D_ASSERT(deref->var); if (deref->path_len == 0) return false; @@ -2510,7 +2615,7 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc return false; deref = &hlsl_ir_store(instr)->lhs; - assert(deref->var); + VKD3D_ASSERT(deref->var); if (deref->path_len == 0) return false; @@ -2531,6 +2636,124 @@ static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struc return false; } +/* This pass flattens array (and row_major matrix) loads that include the indexing of a non-constant + * index into multiple constant loads, where the value of only one of them ends up in the resulting + * node. + * This is achieved through a synthetic variable. The non-constant index is compared for equality + * with every possible value it can have within the array bounds, and the ternary operator is used + * to update the value of the synthetic var when the equality check passes. */ +static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_block *block) +{ + struct hlsl_constant_value zero_value = {0}; + struct hlsl_ir_node *cut_index, *zero, *store; + unsigned int i, i_cut, element_count; + const struct hlsl_deref *deref; + struct hlsl_type *cut_type; + struct hlsl_ir_load *load; + struct hlsl_ir_var *var; + bool row_major; + + if (instr->type != HLSL_IR_LOAD) + return false; + load = hlsl_ir_load(instr); + deref = &load->src; + + if (deref->path_len == 0) + return false; + + for (i = deref->path_len - 1; ; --i) + { + if (deref->path[i].node->type != HLSL_IR_CONSTANT) + { + i_cut = i; + break; + } + + if (i == 0) + return false; + } + + cut_index = deref->path[i_cut].node; + cut_type = deref->var->data_type; + for (i = 0; i < i_cut; ++i) + cut_type = hlsl_get_element_type_from_path_index(ctx, cut_type, deref->path[i].node); + + row_major = hlsl_type_is_row_major(cut_type); + VKD3D_ASSERT(cut_type->class == HLSL_CLASS_ARRAY || row_major); + + if (!(var = hlsl_new_synthetic_var(ctx, row_major ? "row_major-load" : "array-load", instr->data_type, &instr->loc))) + return false; + + if (!(zero = hlsl_new_constant(ctx, instr->data_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + if (!(store = hlsl_new_simple_store(ctx, var, zero))) + return false; + hlsl_block_add_instr(block, store); + + TRACE("Lowering non-constant %s load on variable '%s'.\n", row_major ? "row_major" : "array", deref->var->name); + + element_count = hlsl_type_element_count(cut_type); + for (i = 0; i < element_count; ++i) + { + struct hlsl_type *btype = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL); + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_node *const_i, *equals, *ternary, *var_store; + struct hlsl_ir_load *var_load, *specific_load; + struct hlsl_deref deref_copy = {0}; + + if (!(const_i = hlsl_new_uint_constant(ctx, i, &cut_index->loc))) + return false; + hlsl_block_add_instr(block, const_i); + + operands[0] = cut_index; + operands[1] = const_i; + if (!(equals = hlsl_new_expr(ctx, HLSL_OP2_EQUAL, operands, btype, &cut_index->loc))) + return false; + hlsl_block_add_instr(block, equals); + + if (!(equals = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), var->data_type->dimx, equals, &cut_index->loc))) + return false; + hlsl_block_add_instr(block, equals); + + if (!(var_load = hlsl_new_var_load(ctx, var, &cut_index->loc))) + return false; + hlsl_block_add_instr(block, &var_load->node); + + if (!hlsl_copy_deref(ctx, &deref_copy, deref)) + return false; + hlsl_src_remove(&deref_copy.path[i_cut]); + hlsl_src_from_node(&deref_copy.path[i_cut], const_i); + + if (!(specific_load = hlsl_new_load_index(ctx, &deref_copy, NULL, &cut_index->loc))) + { + hlsl_cleanup_deref(&deref_copy); + return false; + } + hlsl_block_add_instr(block, &specific_load->node); + + hlsl_cleanup_deref(&deref_copy); + + operands[0] = equals; + operands[1] = &specific_load->node; + operands[2] = &var_load->node; + if (!(ternary = hlsl_new_expr(ctx, HLSL_OP3_TERNARY, operands, instr->data_type, &cut_index->loc))) + return false; + hlsl_block_add_instr(block, ternary); + + if (!(var_store = hlsl_new_simple_store(ctx, var, ternary))) + return false; + hlsl_block_add_instr(block, var_store); + } + + if (!(load = hlsl_new_var_load(ctx, var, &instr->loc))) + return false; + hlsl_block_add_instr(block, &load->node); + + return true; +} /* Lower combined samples and sampler variables to synthesized separated textures and samplers. * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) @@ -2554,11 +2777,11 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in case HLSL_RESOURCE_RESINFO: case HLSL_RESOURCE_SAMPLE_CMP: case HLSL_RESOURCE_SAMPLE_CMP_LZ: - case HLSL_RESOURCE_SAMPLE_GRAD: case HLSL_RESOURCE_SAMPLE_INFO: return false; case HLSL_RESOURCE_SAMPLE: + case HLSL_RESOURCE_SAMPLE_GRAD: case HLSL_RESOURCE_SAMPLE_LOD: case HLSL_RESOURCE_SAMPLE_LOD_BIAS: case HLSL_RESOURCE_SAMPLE_PROJ: @@ -2573,7 +2796,7 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in return false; } - assert(hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_SAMPLERS); + VKD3D_ASSERT(hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_SAMPLERS); if (!(name = hlsl_get_string_buffer(ctx))) return false; @@ -2590,7 +2813,7 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in struct hlsl_type *arr_type = load->resource.var->data_type; for (i = 0; i < load->resource.path_len; ++i) { - assert(arr_type->class == HLSL_CLASS_ARRAY); + VKD3D_ASSERT(arr_type->class == HLSL_CLASS_ARRAY); texture_array_type = hlsl_new_array_type(ctx, texture_array_type, arr_type->e.array.elements_count); arr_type = arr_type->e.array.type; } @@ -2619,8 +2842,8 @@ static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *in hlsl_copy_deref(ctx, &load->sampler, &load->resource); load->resource.var = var; - assert(hlsl_deref_get_type(ctx, &load->resource)->class == HLSL_CLASS_TEXTURE); - assert(hlsl_deref_get_type(ctx, &load->sampler)->class == HLSL_CLASS_SAMPLER); + VKD3D_ASSERT(hlsl_deref_get_type(ctx, &load->resource)->class == HLSL_CLASS_TEXTURE); + VKD3D_ASSERT(hlsl_deref_get_type(ctx, &load->sampler)->class == HLSL_CLASS_SAMPLER); return true; } @@ -2918,6 +3141,108 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct return true; } +/* Lower SIN/COS to SINCOS for SM1. */ +static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *arg, *half, *two_pi, *reciprocal_two_pi, *neg_pi; + struct hlsl_constant_value half_value, two_pi_value, reciprocal_two_pi_value, neg_pi_value; + struct hlsl_ir_node *mad, *frc, *reduced; + struct hlsl_type *type; + struct hlsl_ir_expr *expr; + enum hlsl_ir_expr_op op; + struct hlsl_ir_node *sincos; + int i; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + + if (expr->op == HLSL_OP1_SIN) + op = HLSL_OP1_SIN_REDUCED; + else if (expr->op == HLSL_OP1_COS) + op = HLSL_OP1_COS_REDUCED; + else + return false; + + arg = expr->operands[0].node; + type = arg->data_type; + + /* Reduce the range of the input angles to [-pi, pi]. */ + for (i = 0; i < type->dimx; ++i) + { + half_value.u[i].f = 0.5; + two_pi_value.u[i].f = 2.0 * M_PI; + reciprocal_two_pi_value.u[i].f = 1.0 / (2.0 * M_PI); + neg_pi_value.u[i].f = -M_PI; + } + + if (!(half = hlsl_new_constant(ctx, type, &half_value, &instr->loc)) + || !(two_pi = hlsl_new_constant(ctx, type, &two_pi_value, &instr->loc)) + || !(reciprocal_two_pi = hlsl_new_constant(ctx, type, &reciprocal_two_pi_value, &instr->loc)) + || !(neg_pi = hlsl_new_constant(ctx, type, &neg_pi_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, half); + hlsl_block_add_instr(block, two_pi); + hlsl_block_add_instr(block, reciprocal_two_pi); + hlsl_block_add_instr(block, neg_pi); + + if (!(mad = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, arg, reciprocal_two_pi, half))) + return false; + hlsl_block_add_instr(block, mad); + if (!(frc = hlsl_new_unary_expr(ctx, HLSL_OP1_FRACT, mad, &instr->loc))) + return false; + hlsl_block_add_instr(block, frc); + if (!(reduced = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, frc, two_pi, neg_pi))) + return false; + hlsl_block_add_instr(block, reduced); + + if (type->dimx == 1) + { + if (!(sincos = hlsl_new_unary_expr(ctx, op, reduced, &instr->loc))) + return false; + hlsl_block_add_instr(block, sincos); + } + else + { + struct hlsl_ir_node *comps[4] = {0}; + struct hlsl_ir_var *var; + struct hlsl_deref var_deref; + struct hlsl_ir_load *var_load; + + for (i = 0; i < type->dimx; ++i) + { + uint32_t s = hlsl_swizzle_from_writemask(1 << i); + + if (!(comps[i] = hlsl_new_swizzle(ctx, s, 1, reduced, &instr->loc))) + return false; + hlsl_block_add_instr(block, comps[i]); + } + + if (!(var = hlsl_new_synthetic_var(ctx, "sincos", type, &instr->loc))) + return false; + hlsl_init_simple_deref_from_var(&var_deref, var); + + for (i = 0; i < type->dimx; ++i) + { + struct hlsl_block store_block; + + if (!(sincos = hlsl_new_unary_expr(ctx, op, comps[i], &instr->loc))) + return false; + hlsl_block_add_instr(block, sincos); + + if (!hlsl_new_store_component(ctx, &store_block, &var_deref, i, sincos)) + return false; + hlsl_block_add_block(block, &store_block); + } + + if (!(var_load = hlsl_new_load_index(ctx, &var_deref, NULL, &instr->loc))) + return false; + hlsl_block_add_instr(block, &var_load->node); + } + + return true; +} + static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; @@ -2936,7 +3261,7 @@ static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, st float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); /* If this is happens, it means we failed to cast the argument to boolean somewhere. */ - assert(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL); if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &arg->loc))) return false; @@ -2992,7 +3317,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru return false; } - assert(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL); type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT, instr->data_type->dimx, instr->data_type->dimy); @@ -3290,7 +3615,7 @@ static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return false; /* Narrowing casts should have already been lowered. */ - assert(type->dimx == arg_type->dimx); + VKD3D_ASSERT(type->dimx == arg_type->dimx); zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc); if (!zero) @@ -3312,7 +3637,7 @@ struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_bloc struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; struct hlsl_ir_node *cond; - assert(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); + VKD3D_ASSERT(hlsl_types_are_equal(if_true->data_type, if_false->data_type)); if (cond_type->e.numeric.type != HLSL_TYPE_BOOL) { @@ -3511,7 +3836,7 @@ static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru { arg1 = expr->operands[0].node; arg2 = expr->operands[1].node; - assert(arg1->data_type->dimx == arg2->data_type->dimx); + VKD3D_ASSERT(arg1->data_type->dimx == arg2->data_type->dimx); dimx = arg1->data_type->dimx; is_bool = type->e.numeric.type == HLSL_TYPE_BOOL; @@ -3729,6 +4054,7 @@ static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) case HLSL_IR_INDEX: case HLSL_IR_LOAD: case HLSL_IR_RESOURCE_LOAD: + case HLSL_IR_STRING_CONSTANT: case HLSL_IR_SWIZZLE: if (list_empty(&instr->uses)) { @@ -3786,8 +4112,8 @@ static bool mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_deref *deref, if (!deref->rel_offset.node) return false; - assert(deref->var); - assert(deref->rel_offset.node->type != HLSL_IR_CONSTANT); + VKD3D_ASSERT(deref->var); + VKD3D_ASSERT(deref->rel_offset.node->type != HLSL_IR_CONSTANT); deref->var->indexable = true; return true; @@ -3815,15 +4141,16 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { + const struct hlsl_reg_reservation *reservation = &var->reg_reservation; unsigned int r; - if (var->reg_reservation.reg_type) + if (reservation->reg_type) { for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r) { if (var->regs[r].allocation_size > 0) { - if (var->reg_reservation.reg_type != get_regset_name(r)) + if (reservation->reg_type != get_regset_name(r)) { struct vkd3d_string_buffer *type_string; @@ -3839,10 +4166,8 @@ static void allocate_register_reservations(struct hlsl_ctx *ctx) else { var->regs[r].allocated = true; - var->regs[r].id = var->reg_reservation.reg_index; - TRACE("Allocated reserved %s to %c%u-%c%u.\n", var->name, var->reg_reservation.reg_type, - var->reg_reservation.reg_index, var->reg_reservation.reg_type, - var->reg_reservation.reg_index + var->regs[r].allocation_size); + var->regs[r].space = reservation->reg_space; + var->regs[r].index = reservation->reg_index; } } } @@ -4010,6 +4335,7 @@ static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop break; } case HLSL_IR_CONSTANT: + case HLSL_IR_STRING_CONSTANT: break; } } @@ -4111,7 +4437,7 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a unsigned int writemask; uint32_t reg_idx; - assert(component_count <= reg_size); + VKD3D_ASSERT(component_count <= reg_size); for (reg_idx = 0;; ++reg_idx) { @@ -4133,6 +4459,30 @@ static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_a return ret; } +/* Allocate a register with writemask, while reserving reg_writemask. */ +static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx, struct register_allocator *allocator, + unsigned int first_write, unsigned int last_read, uint32_t reg_writemask, uint32_t writemask) +{ + struct hlsl_reg ret = {0}; + uint32_t reg_idx; + + VKD3D_ASSERT((reg_writemask & writemask) == writemask); + + for (reg_idx = 0;; ++reg_idx) + { + if ((get_available_writemask(allocator, first_write, last_read, reg_idx) & reg_writemask) == reg_writemask) + break; + } + + record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read); + + ret.id = reg_idx; + ret.allocation_size = 1; + ret.writemask = writemask; + ret.allocated = true; + return ret; +} + static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write, unsigned int last_read, uint32_t reg_idx, unsigned int reg_size) { @@ -4181,8 +4531,10 @@ static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, { unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC]; + /* FIXME: We could potentially pack structs or arrays more efficiently... */ + if (type->class <= HLSL_CLASS_VECTOR) - return allocate_register(ctx, allocator, first_write, last_read, reg_size, type->dimx); + return allocate_register(ctx, allocator, first_write, last_read, type->dimx, type->dimx); else return allocate_range(ctx, allocator, first_write, last_read, reg_size); } @@ -4224,7 +4576,7 @@ static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hls { enum hlsl_sampler_dim dim; - assert(!load->sampler.var); + VKD3D_ASSERT(!load->sampler.var); dim = var->objects_usage[regset][index].sampler_dim; if (dim != load->sampling_dim) @@ -4334,6 +4686,44 @@ static void calculate_resource_register_counts(struct hlsl_ctx *ctx) } } +static void allocate_instr_temp_register(struct hlsl_ctx *ctx, + struct hlsl_ir_node *instr, struct register_allocator *allocator) +{ + unsigned int reg_writemask = 0, dst_writemask = 0; + + if (instr->reg.allocated || !instr->last_read) + return; + + if (instr->type == HLSL_IR_EXPR) + { + switch (hlsl_ir_expr(instr)->op) + { + case HLSL_OP1_COS_REDUCED: + dst_writemask = VKD3DSP_WRITEMASK_0; + reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_0; + break; + + case HLSL_OP1_SIN_REDUCED: + dst_writemask = VKD3DSP_WRITEMASK_1; + reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_1; + break; + + default: + break; + } + } + + if (reg_writemask) + instr->reg = allocate_register_with_masks(ctx, allocator, + instr->index, instr->last_read, reg_writemask, dst_writemask); + else + instr->reg = allocate_numeric_registers_for_type(ctx, allocator, + instr->index, instr->last_read, instr->data_type); + + TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, + debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); +} + static void allocate_variable_temp_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, struct register_allocator *allocator) { @@ -4373,13 +4763,7 @@ static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx, if (ctx->profile->major_version >= 4 && instr->type == HLSL_IR_CONSTANT) continue; - if (!instr->reg.allocated && instr->last_read) - { - instr->reg = allocate_numeric_registers_for_type(ctx, allocator, instr->index, instr->last_read, - instr->data_type); - TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index, - debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read); - } + allocate_instr_temp_register(ctx, instr, allocator); switch (instr->type) { @@ -4474,9 +4858,9 @@ static void allocate_const_registers_recurse(struct hlsl_ctx *ctx, constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type)); - assert(hlsl_is_numeric_type(type)); - assert(type->dimy == 1); - assert(constant->reg.writemask); + VKD3D_ASSERT(hlsl_is_numeric_type(type)); + VKD3D_ASSERT(type->dimy == 1); + VKD3D_ASSERT(constant->reg.writemask); for (x = 0, i = 0; x < 4; ++x) { @@ -4587,8 +4971,46 @@ static void sort_uniforms_by_numeric_bind_count(struct hlsl_ctx *ctx) list_move_tail(&ctx->extern_vars, &sorted); } +/* In SM2, 'sincos' expects specific constants as src1 and src2 arguments. + * These have to be referenced directly, i.e. as 'c' not 'r'. */ +static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct register_allocator *allocator) +{ + const struct hlsl_ir_node *instr; + struct hlsl_type *type; + + if (ctx->profile->major_version >= 3) + return; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + if (instr->type == HLSL_IR_EXPR && (hlsl_ir_expr(instr)->op == HLSL_OP1_SIN_REDUCED + || hlsl_ir_expr(instr)->op == HLSL_OP1_COS_REDUCED)) + { + type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4); + + ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); + TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type)); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2, 2.60416674e-03f); + record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3, 2.60416680e-04f); + + ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type); + TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type)); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2, 1.00000000e+00f); + record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3, 5.00000000e-01f); + + return; + } + } +} + static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func) { + struct register_allocator allocator_used = {0}; struct register_allocator allocator = {0}; struct hlsl_ir_var *var; @@ -4597,6 +5019,7 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int bind_count = var->bind_count[HLSL_REGSET_NUMERIC]; if (!var->is_uniform || reg_size == 0) continue; @@ -4606,15 +5029,18 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi unsigned int reg_idx = var->reg_reservation.reg_index; unsigned int i; - assert(reg_size % 4 == 0); + VKD3D_ASSERT(reg_size % 4 == 0); for (i = 0; i < reg_size / 4; ++i) { - if (get_available_writemask(&allocator, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) + if (i < bind_count) { - hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, - "Overlapping register() reservations on 'c%u'.", reg_idx + i); + if (get_available_writemask(&allocator_used, 1, UINT_MAX, reg_idx + i) != VKD3DSP_WRITEMASK_ALL) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Overlapping register() reservations on 'c%u'.", reg_idx + i); + } + record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); } - record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX); } @@ -4627,6 +5053,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi } } + vkd3d_free(allocator_used.allocations); + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC]; @@ -4644,6 +5072,8 @@ static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_functi allocate_const_registers_recurse(ctx, &entry_func->body, &allocator); + allocate_sincos_const_registers(ctx, &entry_func->body, &allocator); + vkd3d_free(allocator.allocations); } @@ -4693,11 +5123,11 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var uint32_t reg; bool builtin; - assert(var->semantic.name); + VKD3D_ASSERT(var->semantic.name); if (ctx->profile->major_version < 4) { - D3DSHADER_PARAM_REGISTER_TYPE sm1_type; + struct vkd3d_shader_version version; D3DDECLUSAGE usage; uint32_t usage_idx; @@ -4705,8 +5135,12 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var if (ctx->profile->major_version == 1 && output && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) return; - builtin = hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &sm1_type, ®); - if (!builtin && !hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx)) + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + builtin = hlsl_sm1_register_from_semantic(&version, + var->semantic.name, var->semantic.index, output, &type, ®); + if (!builtin && !hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx)) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Invalid semantic '%s'.", var->semantic.name); @@ -4715,7 +5149,6 @@ static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var if ((!output && !var->last_read) || (output && !var->first_write)) return; - type = (enum vkd3d_shader_register_type)sm1_type; } else { @@ -4762,13 +5195,14 @@ static void allocate_semantic_registers(struct hlsl_ctx *ctx) } } -static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t index) +static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx, uint32_t space, uint32_t index) { const struct hlsl_buffer *buffer; LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry) { - if (buffer->used_size && buffer->reservation.reg_type == 'b' && buffer->reservation.reg_index == index) + if (buffer->reservation.reg_type == 'b' + && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index) return buffer; } return NULL; @@ -4783,6 +5217,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va if (register_reservation) { var->buffer_offset = 4 * var->reg_reservation.reg_index; + var->has_explicit_bind_point = 1; } else { @@ -4815,6 +5250,7 @@ static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_va } } var->buffer_offset = var->reg_reservation.offset_index; + var->has_explicit_bind_point = 1; } else { @@ -4913,11 +5349,19 @@ void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx) } } +static unsigned int get_max_cbuffer_reg_index(struct hlsl_ctx *ctx) +{ + if (hlsl_version_ge(ctx, 5, 1)) + return UINT_MAX; + + return 13; +} + static void allocate_buffers(struct hlsl_ctx *ctx) { struct hlsl_buffer *buffer; + uint32_t index = 0, id = 0; struct hlsl_ir_var *var; - uint32_t index = 0; LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) { @@ -4938,32 +5382,59 @@ static void allocate_buffers(struct hlsl_ctx *ctx) if (buffer->type == HLSL_BUFFER_CONSTANT) { - if (buffer->reservation.reg_type == 'b') + const struct hlsl_reg_reservation *reservation = &buffer->reservation; + + if (reservation->reg_type == 'b') { - const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, buffer->reservation.reg_index); + const struct hlsl_buffer *reserved_buffer = get_reserved_buffer(ctx, + reservation->reg_space, reservation->reg_index); + unsigned int max_index = get_max_cbuffer_reg_index(ctx); + + if (buffer->reservation.reg_index > max_index) + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Buffer reservation cb%u exceeds target's maximum (cb%u).", + buffer->reservation.reg_index, max_index); if (reserved_buffer && reserved_buffer != buffer) { hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple buffers bound to cb%u.", buffer->reservation.reg_index); + "Multiple buffers bound to space %u, index %u.", + reservation->reg_space, reservation->reg_index); hlsl_note(ctx, &reserved_buffer->loc, VKD3D_SHADER_LOG_ERROR, - "Buffer %s is already bound to cb%u.", reserved_buffer->name, buffer->reservation.reg_index); + "Buffer %s is already bound to space %u, index %u.", + reserved_buffer->name, reservation->reg_space, reservation->reg_index); } - buffer->reg.id = buffer->reservation.reg_index; + buffer->reg.space = reservation->reg_space; + buffer->reg.index = reservation->reg_index; + if (hlsl_version_ge(ctx, 5, 1)) + buffer->reg.id = id++; + else + buffer->reg.id = buffer->reg.index; buffer->reg.allocation_size = 1; buffer->reg.allocated = true; - TRACE("Allocated reserved %s to cb%u.\n", buffer->name, index); + TRACE("Allocated reserved %s to space %u, index %u, id %u.\n", + buffer->name, buffer->reg.space, buffer->reg.index, buffer->reg.id); } - else if (!buffer->reservation.reg_type) + else if (!reservation->reg_type) { - while (get_reserved_buffer(ctx, index)) + unsigned int max_index = get_max_cbuffer_reg_index(ctx); + while (get_reserved_buffer(ctx, 0, index)) ++index; - buffer->reg.id = index; + if (index > max_index) + hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION, + "Too many buffers allocated, target's maximum is %u.", max_index); + + buffer->reg.space = 0; + buffer->reg.index = index; + if (hlsl_version_ge(ctx, 5, 1)) + buffer->reg.id = id++; + else + buffer->reg.id = buffer->reg.index; buffer->reg.allocation_size = 1; buffer->reg.allocated = true; - TRACE("Allocated %s to cb%u.\n", buffer->name, index); + TRACE("Allocated %s to space 0, index %u, id %u.\n", buffer->name, buffer->reg.index, buffer->reg.id); ++index; } else @@ -4980,7 +5451,7 @@ static void allocate_buffers(struct hlsl_ctx *ctx) } static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset, - uint32_t index, bool allocated_only) + uint32_t space, uint32_t index, bool allocated_only) { const struct hlsl_ir_var *var; unsigned int start, count; @@ -4995,12 +5466,18 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum start = var->reg_reservation.reg_index; count = var->data_type->reg_size[regset]; + if (var->reg_reservation.reg_space != space) + continue; + if (!var->regs[regset].allocated && allocated_only) continue; } else if (var->regs[regset].allocated) { - start = var->regs[regset].id; + if (var->regs[regset].space != space) + continue; + + start = var->regs[regset].index; count = var->regs[regset].allocation_size; } else @@ -5017,8 +5494,8 @@ static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) { char regset_name = get_regset_name(regset); + uint32_t min_index = 0, id = 0; struct hlsl_ir_var *var; - uint32_t min_index = 0; if (regset == HLSL_REGSET_UAVS) { @@ -5041,35 +5518,44 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) if (var->regs[regset].allocated) { const struct hlsl_ir_var *reserved_object, *last_reported = NULL; - unsigned int index, i; + unsigned int i; - if (var->regs[regset].id < min_index) + if (var->regs[regset].index < min_index) { - assert(regset == HLSL_REGSET_UAVS); + VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, "UAV index (%u) must be higher than the maximum render target index (%u).", - var->regs[regset].id, min_index - 1); + var->regs[regset].index, min_index - 1); continue; } for (i = 0; i < count; ++i) { - index = var->regs[regset].id + i; + unsigned int space = var->regs[regset].space; + unsigned int index = var->regs[regset].index + i; /* get_allocated_object() may return "var" itself, but we * actually want that, otherwise we'll end up reporting the * same conflict between the same two variables twice. */ - reserved_object = get_allocated_object(ctx, regset, index, true); + reserved_object = get_allocated_object(ctx, regset, space, index, true); if (reserved_object && reserved_object != var && reserved_object != last_reported) { hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS, - "Multiple variables bound to %c%u.", regset_name, index); + "Multiple variables bound to space %u, %c%u.", regset_name, space, index); hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR, - "Variable '%s' is already bound to %c%u.", reserved_object->name, - regset_name, index); + "Variable '%s' is already bound to space %u, %c%u.", + reserved_object->name, regset_name, space, index); last_reported = reserved_object; } } + + if (hlsl_version_ge(ctx, 5, 1)) + var->regs[regset].id = id++; + else + var->regs[regset].id = var->regs[regset].index; + TRACE("Allocated reserved variable %s to space %u, indices %c%u-%c%u, id %u.\n", + var->name, var->regs[regset].space, regset_name, var->regs[regset].index, + regset_name, var->regs[regset].index + count, var->regs[regset].id); } else { @@ -5078,7 +5564,7 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) while (available < count) { - if (get_allocated_object(ctx, regset, index, false)) + if (get_allocated_object(ctx, regset, 0, index, false)) available = 0; else ++available; @@ -5086,10 +5572,15 @@ static void allocate_objects(struct hlsl_ctx *ctx, enum hlsl_regset regset) } index -= count; - var->regs[regset].id = index; + var->regs[regset].space = 0; + var->regs[regset].index = index; + if (hlsl_version_ge(ctx, 5, 1)) + var->regs[regset].id = id++; + else + var->regs[regset].id = var->regs[regset].index; var->regs[regset].allocated = true; - TRACE("Allocated variable %s to %c%u-%c%u.\n", var->name, regset_name, index, regset_name, - index + count); + TRACE("Allocated variable %s to space 0, indices %c%u-%c%u, id %u.\n", var->name, + regset_name, index, regset_name, index + count, var->regs[regset].id); ++index; } } @@ -5109,12 +5600,12 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl struct hlsl_ir_node *path_node = deref->path[i].node; unsigned int idx = 0; - assert(path_node); + VKD3D_ASSERT(path_node); if (path_node->type != HLSL_IR_CONSTANT) return false; /* We should always have generated a cast to UINT. */ - assert(path_node->data_type->class == HLSL_CLASS_SCALAR + VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); idx = hlsl_ir_constant(path_node)->value.u[0].u; @@ -5123,21 +5614,13 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl { case HLSL_CLASS_VECTOR: if (idx >= type->dimx) - { - hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Vector index is out of bounds. %u/%u", idx, type->dimx); return false; - } *start += idx; break; case HLSL_CLASS_MATRIX: if (idx >= hlsl_type_major_size(type)) - { - hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type)); return false; - } if (hlsl_type_is_row_major(type)) *start += idx * type->dimx; else @@ -5146,11 +5629,7 @@ bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hl case HLSL_CLASS_ARRAY: if (idx >= type->e.array.elements_count) - { - hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, - "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count); return false; - } *start += idx * hlsl_type_component_count(type->e.array.type); break; @@ -5186,11 +5665,11 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref struct hlsl_ir_node *path_node = deref->path[i].node; unsigned int idx = 0; - assert(path_node); + VKD3D_ASSERT(path_node); if (path_node->type == HLSL_IR_CONSTANT) { /* We should always have generated a cast to UINT. */ - assert(path_node->data_type->class == HLSL_CLASS_SCALAR + VKD3D_ASSERT(path_node->data_type->class == HLSL_CLASS_SCALAR && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT); idx = hlsl_ir_constant(path_node)->value.u[0].u; @@ -5240,8 +5719,8 @@ bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref type = hlsl_get_element_type_from_path_index(ctx, type, path_node); } - assert(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); - assert(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); + VKD3D_ASSERT(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1)); + VKD3D_ASSERT(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4); return index_is_constant; } @@ -5256,16 +5735,17 @@ bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref if (offset_node) { /* We should always have generated a cast to UINT. */ - assert(offset_node->data_type->class == HLSL_CLASS_SCALAR + VKD3D_ASSERT(offset_node->data_type->class == HLSL_CLASS_SCALAR && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT); - assert(offset_node->type != HLSL_IR_CONSTANT); + VKD3D_ASSERT(offset_node->type != HLSL_IR_CONSTANT); return false; } size = deref->var->data_type->reg_size[regset]; if (*offset >= size) { - hlsl_error(ctx, &offset_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, + /* FIXME: Report a more specific location for the constant deref. */ + hlsl_error(ctx, &deref->var->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS, "Dereference is out of bounds. %u/%u", *offset, size); return false; } @@ -5280,8 +5760,9 @@ unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl if (hlsl_offset_from_deref(ctx, deref, &offset)) return offset; - hlsl_fixme(ctx, &deref->rel_offset.node->loc, "Dereference with non-constant offset of type %s.", - hlsl_node_type_to_string(deref->rel_offset.node->type)); + if (deref->rel_offset.node) + hlsl_fixme(ctx, &deref->rel_offset.node->loc, "Dereference with non-constant offset of type %s.", + hlsl_node_type_to_string(deref->rel_offset.node->type)); return 0; } @@ -5292,9 +5773,10 @@ struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_dere struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC]; unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref); - assert(deref->data_type); - assert(hlsl_is_numeric_type(deref->data_type)); + VKD3D_ASSERT(deref->data_type); + VKD3D_ASSERT(hlsl_is_numeric_type(deref->data_type)); + ret.index += offset / 4; ret.id += offset / 4; ret.writemask = 0xf & (0xf << (offset % 4)); @@ -5446,6 +5928,414 @@ void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body) } while (progress); } +static void sm1_generate_vsir_signature_entry(struct hlsl_ctx *ctx, + struct vsir_program *program, bool output, struct hlsl_ir_var *var) +{ + enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE; + enum vkd3d_shader_register_type type; + struct shader_signature *signature; + struct signature_element *element; + unsigned int register_index, mask; + + if ((!output && !var->last_read) || (output && !var->first_write)) + return; + + if (output) + signature = &program->output_signature; + else + signature = &program->input_signature; + + if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity, + signature->element_count + 1, sizeof(*signature->elements))) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + element = &signature->elements[signature->element_count++]; + + if (!hlsl_sm1_register_from_semantic(&program->shader_version, + var->semantic.name, var->semantic.index, output, &type, ®ister_index)) + { + unsigned int usage, usage_idx; + bool ret; + + register_index = var->regs[HLSL_REGSET_NUMERIC].id; + + ret = hlsl_sm1_usage_from_semantic(var->semantic.name, var->semantic.index, &usage, &usage_idx); + VKD3D_ASSERT(ret); + /* With the exception of vertex POSITION output, none of these are + * system values. Pixel POSITION input is not equivalent to + * SV_Position; the closer equivalent is VPOS, which is not declared + * as a semantic. */ + if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX + && output && usage == VKD3D_DECL_USAGE_POSITION) + sysval = VKD3D_SHADER_SV_POSITION; + } + mask = (1 << var->data_type->dimx) - 1; + + memset(element, 0, sizeof(*element)); + if (!(element->semantic_name = vkd3d_strdup(var->semantic.name))) + { + --signature->element_count; + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + element->semantic_index = var->semantic.index; + element->sysval_semantic = sysval; + element->component_type = VKD3D_SHADER_COMPONENT_FLOAT; + element->register_index = register_index; + element->target_location = register_index; + element->register_count = 1; + element->mask = mask; + element->used_mask = mask; + if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output) + element->interpolation_mode = VKD3DSIM_LINEAR; +} + +static void sm1_generate_vsir_signature(struct hlsl_ctx *ctx, struct vsir_program *program) +{ + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + if (var->is_input_semantic) + sm1_generate_vsir_signature_entry(ctx, program, false, var); + if (var->is_output_semantic) + sm1_generate_vsir_signature_entry(ctx, program, true, var); + } +} + +/* OBJECTIVE: Translate all the information from ctx and entry_func to the + * vsir_program and ctab blob, so they can be used as input to d3dbc_compile() + * without relying on ctx and entry_func. */ +static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, + uint64_t config_flags, struct vsir_program *program, struct vkd3d_shader_code *ctab) +{ + struct vkd3d_shader_version version = {0}; + struct vkd3d_bytecode_buffer buffer = {0}; + + version.major = ctx->profile->major_version; + version.minor = ctx->profile->minor_version; + version.type = ctx->profile->type; + if (!vsir_program_init(program, NULL, &version, 0)) + { + ctx->result = VKD3D_ERROR_OUT_OF_MEMORY; + return; + } + + write_sm1_uniforms(ctx, &buffer); + if (buffer.status) + { + vkd3d_free(buffer.data); + ctx->result = buffer.status; + return; + } + ctab->code = buffer.data; + ctab->size = buffer.size; + + sm1_generate_vsir_signature(ctx, program); +} + +static struct hlsl_ir_jump *loop_unrolling_find_jump(struct hlsl_block *block, struct hlsl_ir_node *stop_point, + struct hlsl_block **found_block) +{ + struct hlsl_ir_node *node; + + LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry) + { + if (node == stop_point) + return NULL; + + if (node->type == HLSL_IR_IF) + { + struct hlsl_ir_if *iff = hlsl_ir_if(node); + struct hlsl_ir_jump *jump = NULL; + + if ((jump = loop_unrolling_find_jump(&iff->then_block, stop_point, found_block))) + return jump; + if ((jump = loop_unrolling_find_jump(&iff->else_block, stop_point, found_block))) + return jump; + } + else if (node->type == HLSL_IR_JUMP) + { + struct hlsl_ir_jump *jump = hlsl_ir_jump(node); + + if (jump->type == HLSL_IR_JUMP_BREAK || jump->type == HLSL_IR_JUMP_CONTINUE) + { + *found_block = block; + return jump; + } + } + } + + return NULL; +} + +static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop) +{ + /* Always use the explicit limit if it has been passed. */ + if (loop->unroll_limit) + return loop->unroll_limit; + + /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */ + if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + return 1024; + + /* SM4 limits implicit unrolling to 254 iterations. */ + if (hlsl_version_ge(ctx, 4, 0)) + return 254; + + /* SM<3 implicitly unrolls up to 1024 iterations. */ + return 1024; +} + +static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_block *loop_parent, struct hlsl_ir_loop *loop) +{ + unsigned int max_iterations, i; + + max_iterations = loop_unrolling_get_max_iterations(ctx, loop); + + for (i = 0; i < max_iterations; ++i) + { + struct hlsl_block tmp_dst, *jump_block; + struct hlsl_ir_jump *jump = NULL; + + if (!hlsl_clone_block(ctx, &tmp_dst, &loop->body)) + return false; + list_move_before(&loop->node.entry, &tmp_dst.instrs); + hlsl_block_cleanup(&tmp_dst); + + hlsl_run_const_passes(ctx, block); + + if ((jump = loop_unrolling_find_jump(loop_parent, &loop->node, &jump_block))) + { + enum hlsl_ir_jump_type type = jump->type; + + if (jump_block != loop_parent) + { + if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + hlsl_error(ctx, &jump->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, + "Unable to unroll loop, unrolling loops with conditional jumps is currently not supported."); + return false; + } + + list_move_slice_tail(&tmp_dst.instrs, &jump->node.entry, list_prev(&loop_parent->instrs, &loop->node.entry)); + hlsl_block_cleanup(&tmp_dst); + + if (type == HLSL_IR_JUMP_BREAK) + break; + } + } + + /* Native will not emit an error if max_iterations has been reached with an + * explicit limit. It also will not insert a loop if there are iterations left + * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */ + if (!loop->unroll_limit && i == max_iterations) + { + if (loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL, + "Unable to unroll loop, maximum iterations reached (%u).", max_iterations); + return false; + } + + list_remove(&loop->node.entry); + hlsl_free_instr(&loop->node); + + return true; +} + +/* + * loop_unrolling_find_unrollable_loop() is not the normal way to do things; + * normal passes simply iterate over the whole block and apply a transformation + * to every relevant instruction. However, loop unrolling can fail, and we want + * to leave the loop in its previous state in that case. That isn't a problem by + * itself, except that loop unrolling needs copy-prop in order to work properly, + * and copy-prop state at the time of the loop depends on the rest of the program + * up to that point. This means we need to clone the whole program, and at that + * point we have to search it again anyway to find the clone of the loop we were + * going to unroll. + * + * FIXME: Ideally we wouldn't clone the whole program; instead we would run copyprop + * up until the loop instruction, clone just that loop, then use copyprop again + * with the saved state after unrolling. However, copyprop currently isn't built + * for that yet [notably, it still relies on indices]. Note also this still doesn't + * really let us use transform_ir() anyway [since we don't have a good way to say + * "copyprop from the beginning of the program up to the instruction we're + * currently processing" from the callback]; we'd have to use a dedicated + * recursive function instead. */ +static struct hlsl_ir_loop *loop_unrolling_find_unrollable_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, + struct hlsl_block **containing_block) +{ + struct hlsl_ir_node *instr; + + LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry) + { + switch (instr->type) + { + case HLSL_IR_LOOP: + { + struct hlsl_ir_loop *nested_loop; + struct hlsl_ir_loop *loop = hlsl_ir_loop(instr); + + if ((nested_loop = loop_unrolling_find_unrollable_loop(ctx, &loop->body, containing_block))) + return nested_loop; + + if (loop->unroll_type == HLSL_IR_LOOP_UNROLL || loop->unroll_type == HLSL_IR_LOOP_FORCE_UNROLL) + { + *containing_block = block; + return loop; + } + + break; + } + case HLSL_IR_IF: + { + struct hlsl_ir_loop *loop; + struct hlsl_ir_if *iff = hlsl_ir_if(instr); + + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->then_block, containing_block))) + return loop; + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &iff->else_block, containing_block))) + return loop; + + break; + } + case HLSL_IR_SWITCH: + { + struct hlsl_ir_switch *s = hlsl_ir_switch(instr); + struct hlsl_ir_switch_case *c; + struct hlsl_ir_loop *loop; + + LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry) + { + if ((loop = loop_unrolling_find_unrollable_loop(ctx, &c->body, containing_block))) + return loop; + } + + break; + } + default: + break; + } + } + + return NULL; +} + +static void transform_unroll_loops(struct hlsl_ctx *ctx, struct hlsl_block *block) +{ + while (true) + { + struct hlsl_block clone, *containing_block; + struct hlsl_ir_loop *loop, *cloned_loop; + + if (!(loop = loop_unrolling_find_unrollable_loop(ctx, block, &containing_block))) + return; + + if (!hlsl_clone_block(ctx, &clone, block)) + return; + + cloned_loop = loop_unrolling_find_unrollable_loop(ctx, &clone, &containing_block); + VKD3D_ASSERT(cloned_loop); + + if (!loop_unrolling_unroll_loop(ctx, &clone, containing_block, cloned_loop)) + { + hlsl_block_cleanup(&clone); + loop->unroll_type = HLSL_IR_LOOP_FORCE_LOOP; + continue; + } + + hlsl_block_cleanup(block); + hlsl_block_init(block); + hlsl_block_add_block(block, &clone); + } +} + +static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block) +{ + struct hlsl_ir_node *call, *rhs, *store; + struct hlsl_ir_function_decl *func; + unsigned int component_count; + struct hlsl_ir_load *load; + struct hlsl_ir_expr *expr; + struct hlsl_ir_var *lhs; + char *body; + + static const char template[] = + "typedef uint%u uintX;\n" + "float%u soft_f16tof32(uintX x)\n" + "{\n" + " uintX mantissa = x & 0x3ff;\n" + " uintX high2 = mantissa >> 8;\n" + " uintX high2_check = high2 ? high2 : mantissa;\n" + " uintX high6 = high2_check >> 4;\n" + " uintX high6_check = high6 ? high6 : high2_check;\n" + "\n" + " uintX high8 = high6_check >> 2;\n" + " uintX high8_check = (high8 ? high8 : high6_check) >> 1;\n" + " uintX shift = high6 ? (high2 ? 12 : 4) : (high2 ? 8 : 0);\n" + " shift = high8 ? shift + 2 : shift;\n" + " shift = high8_check ? shift + 1 : shift;\n" + " shift = -shift + 10;\n" + " shift = mantissa ? shift : 11;\n" + " uintX subnormal_mantissa = ((mantissa << shift) << 23) & 0x7fe000;\n" + " uintX subnormal_exp = -(shift << 23) + 0x38800000;\n" + " uintX subnormal_val = subnormal_exp + subnormal_mantissa;\n" + " uintX subnormal_or_zero = mantissa ? subnormal_val : 0;\n" + "\n" + " uintX exponent = (((x >> 10) << 23) & 0xf800000) + 0x38000000;\n" + "\n" + " uintX low_3 = (x << 13) & 0x7fe000;\n" + " uintX normalized_val = exponent + low_3;\n" + " uintX inf_nan_val = low_3 + 0x7f800000;\n" + "\n" + " uintX exp_mask = 0x7c00;\n" + " uintX is_inf_nan = (x & exp_mask) == exp_mask;\n" + " uintX is_normalized = x & exp_mask;\n" + "\n" + " uintX check = is_inf_nan ? inf_nan_val : normalized_val;\n" + " uintX exp_mantissa = (is_normalized ? check : subnormal_or_zero) & 0x7fffe000;\n" + " uintX sign_bit = (x << 16) & 0x80000000;\n" + "\n" + " return asfloat(exp_mantissa + sign_bit);\n" + "}\n"; + + + if (node->type != HLSL_IR_EXPR) + return false; + + expr = hlsl_ir_expr(node); + + if (expr->op != HLSL_OP1_F16TOF32) + return false; + + rhs = expr->operands[0].node; + component_count = hlsl_type_component_count(rhs->data_type); + + if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count))) + return false; + + if (!(func = hlsl_compile_internal_function(ctx, "soft_f16tof32", body))) + return false; + + lhs = func->parameters.vars[0]; + + if (!(store = hlsl_new_simple_store(ctx, lhs, rhs))) + return false; + hlsl_block_add_instr(block, store); + + if (!(call = hlsl_new_call(ctx, func, &node->loc))) + return false; + hlsl_block_add_instr(block, call); + + if (!(load = hlsl_new_var_load(ctx, func->return_var, &node->loc))) + return false; + hlsl_block_add_instr(block, &load->node); + + return true; +} + int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out) { @@ -5466,6 +6356,9 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry if (ctx->result) return ctx->result; + if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0)) + lower_ir(ctx, lower_f16tof32, body); + lower_return(ctx, entry_func, body, false); while (hlsl_transform_ir(ctx, lower_calls, body, NULL)); @@ -5532,6 +6425,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry hlsl_transform_ir(ctx, lower_discard_neg, body, NULL); } + transform_unroll_loops(ctx, body); hlsl_run_const_passes(ctx, body); remove_unreachable_code(ctx, body); @@ -5541,7 +6435,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_int_dot, body); - hlsl_transform_ir(ctx, validate_static_object_references, body, NULL); + hlsl_transform_ir(ctx, validate_dereferences, body, NULL); hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL); if (profile->major_version >= 4) hlsl_transform_ir(ctx, lower_combined_samples, body, NULL); @@ -5555,6 +6449,8 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry if (profile->major_version < 4) { + while (lower_ir(ctx, lower_nonconstant_array_loads, body)); + lower_ir(ctx, lower_ternary, body); lower_ir(ctx, lower_nonfloat_exprs, body); @@ -5569,6 +6465,7 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_round, body); lower_ir(ctx, lower_ceil, body); lower_ir(ctx, lower_floor, body); + lower_ir(ctx, lower_trig, body); lower_ir(ctx, lower_comparison_operators, body); lower_ir(ctx, lower_logic_not, body); if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) @@ -5628,7 +6525,25 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry switch (target_type) { case VKD3D_SHADER_TARGET_D3D_BYTECODE: - return hlsl_sm1_write(ctx, entry_func, out); + { + uint32_t config_flags = vkd3d_shader_init_config_flags(); + struct vkd3d_shader_code ctab = {0}; + struct vsir_program program; + int result; + + sm1_generate_vsir(ctx, entry_func, config_flags, &program, &ctab); + if (ctx->result) + { + vsir_program_cleanup(&program); + vkd3d_shader_free_shader_code(&ctab); + return ctx->result; + } + + result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context, ctx, entry_func); + vsir_program_cleanup(&program); + vkd3d_shader_free_shader_code(&ctab); + return result; + } case VKD3D_SHADER_TARGET_DXBC_TPF: return hlsl_sm4_write(ctx, entry_func, out); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c index 16015fa8a81..db4913b7c62 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_constant_ops.c @@ -28,7 +28,7 @@ static bool fold_abs(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -119,7 +119,7 @@ static bool fold_bit_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -234,7 +234,7 @@ static bool fold_ceil(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -260,7 +260,7 @@ static bool fold_exp2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -286,7 +286,7 @@ static bool fold_floor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -313,7 +313,7 @@ static bool fold_fract(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, unsigned int k; float i; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -339,7 +339,7 @@ static bool fold_log2(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -384,7 +384,7 @@ static bool fold_neg(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -418,7 +418,7 @@ static bool fold_not(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -442,7 +442,7 @@ static bool fold_rcp(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -487,7 +487,7 @@ static bool fold_rsq(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -527,7 +527,7 @@ static bool fold_sat(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -553,7 +553,7 @@ static bool fold_sqrt(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -598,8 +598,8 @@ static bool fold_add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->e.numeric.type); - assert(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -635,8 +635,8 @@ static bool fold_and(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->e.numeric.type); - assert(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -662,8 +662,8 @@ static bool fold_or(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, const enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->e.numeric.type); - assert(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -689,8 +689,8 @@ static bool fold_bit_xor(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->e.numeric.type); - assert(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -715,9 +715,9 @@ static bool fold_dot(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->e.numeric.type); - assert(type == src2->node.data_type->e.numeric.type); - assert(src1->node.data_type->dimx == src2->node.data_type->dimx); + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); dst->u[0].f = 0.0f; for (k = 0; k < src1->node.data_type->dimx; ++k) @@ -743,11 +743,11 @@ static bool fold_dp2add(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->e.numeric.type); - assert(type == src2->node.data_type->e.numeric.type); - assert(type == src3->node.data_type->e.numeric.type); - assert(src1->node.data_type->dimx == src2->node.data_type->dimx); - assert(src3->node.data_type->dimx == 1); + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src3->node.data_type->e.numeric.type); + VKD3D_ASSERT(src1->node.data_type->dimx == src2->node.data_type->dimx); + VKD3D_ASSERT(src3->node.data_type->dimx == 1); dst->u[0].f = src3->value.u[0].f; for (k = 0; k < src1->node.data_type->dimx; ++k) @@ -774,8 +774,8 @@ static bool fold_div(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->e.numeric.type); - assert(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -841,8 +841,8 @@ static bool fold_equal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, co { unsigned int k; - assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -877,8 +877,8 @@ static bool fold_gequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c { unsigned int k; - assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -916,8 +916,8 @@ static bool fold_less(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, con { unsigned int k; - assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -955,8 +955,8 @@ static bool fold_lshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c { unsigned int k; - assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); - assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); + VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); for (k = 0; k < dst_type->dimx; ++k) { @@ -986,8 +986,8 @@ static bool fold_max(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->e.numeric.type); - assert(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -1024,8 +1024,8 @@ static bool fold_min(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->e.numeric.type); - assert(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -1063,8 +1063,8 @@ static bool fold_mod(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->e.numeric.type); - assert(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -1105,8 +1105,8 @@ static bool fold_mul(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, cons enum hlsl_base_type type = dst_type->e.numeric.type; unsigned int k; - assert(type == src1->node.data_type->e.numeric.type); - assert(type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -1139,8 +1139,8 @@ static bool fold_nequal(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c { unsigned int k; - assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); - assert(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(src1->node.data_type->e.numeric.type == src2->node.data_type->e.numeric.type); for (k = 0; k < dst_type->dimx; ++k) { @@ -1175,9 +1175,9 @@ static bool fold_ternary(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, { unsigned int k; - assert(dst_type->e.numeric.type == src2->node.data_type->e.numeric.type); - assert(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); - assert(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(dst_type->e.numeric.type == src2->node.data_type->e.numeric.type); + VKD3D_ASSERT(dst_type->e.numeric.type == src3->node.data_type->e.numeric.type); + VKD3D_ASSERT(src1->node.data_type->e.numeric.type == HLSL_TYPE_BOOL); for (k = 0; k < dst_type->dimx; ++k) dst->u[k] = src1->value.u[k].u ? src2->value.u[k] : src3->value.u[k]; @@ -1190,8 +1190,8 @@ static bool fold_rshift(struct hlsl_ctx *ctx, struct hlsl_constant_value *dst, c { unsigned int k; - assert(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); - assert(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); + VKD3D_ASSERT(dst_type->e.numeric.type == src1->node.data_type->e.numeric.type); + VKD3D_ASSERT(src2->node.data_type->e.numeric.type == HLSL_TYPE_INT); for (k = 0; k < dst_type->dimx; ++k) { @@ -1239,7 +1239,7 @@ bool hlsl_fold_constant_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, { if (expr->operands[i].node->type != HLSL_IR_CONSTANT) return false; - assert(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); + VKD3D_ASSERT(expr->operands[i].node->data_type->class <= HLSL_CLASS_VECTOR); } } arg1 = hlsl_ir_constant(expr->operands[0].node); diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index b3b745fc1b2..6dbe30b1553 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -19,9 +19,73 @@ #include "vkd3d_shader_private.h" #include "vkd3d_types.h" -bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve) +static int convert_parameter_info(const struct vkd3d_shader_compile_info *compile_info, + unsigned int *ret_count, const struct vkd3d_shader_parameter1 **ret_parameters) +{ + const struct vkd3d_shader_spirv_target_info *spirv_info; + struct vkd3d_shader_parameter1 *parameters; + + *ret_count = 0; + *ret_parameters = NULL; + + if (!(spirv_info = vkd3d_find_struct(compile_info->next, SPIRV_TARGET_INFO)) || !spirv_info->parameter_count) + return VKD3D_OK; + + if (!(parameters = vkd3d_calloc(spirv_info->parameter_count, sizeof(*parameters)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (unsigned int i = 0; i < spirv_info->parameter_count; ++i) + { + const struct vkd3d_shader_parameter *src = &spirv_info->parameters[i]; + struct vkd3d_shader_parameter1 *dst = ¶meters[i]; + + dst->name = src->name; + dst->type = src->type; + dst->data_type = src->data_type; + + if (src->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) + { + dst->u.immediate_constant = src->u.immediate_constant; + } + else if (src->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) + { + dst->u.specialization_constant = src->u.specialization_constant; + } + else + { + ERR("Invalid parameter type %#x.\n", src->type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + } + + *ret_count = spirv_info->parameter_count; + *ret_parameters = parameters; + + return VKD3D_OK; +} + +bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_version *version, unsigned int reserve) { memset(program, 0, sizeof(*program)); + + if (compile_info) + { + const struct vkd3d_shader_parameter_info *parameter_info; + + if ((parameter_info = vkd3d_find_struct(compile_info->next, PARAMETER_INFO))) + { + program->parameter_count = parameter_info->parameter_count; + program->parameters = parameter_info->parameters; + } + else + { + if (convert_parameter_info(compile_info, &program->parameter_count, &program->parameters) < 0) + return false; + program->free_parameters = true; + } + } + program->shader_version = *version; return shader_instruction_array_init(&program->instructions, reserve); } @@ -30,6 +94,8 @@ void vsir_program_cleanup(struct vsir_program *program) { size_t i; + if (program->free_parameters) + vkd3d_free((void *)program->parameters); for (i = 0; i < program->block_name_count; ++i) vkd3d_free((void *)program->block_names[i]); vkd3d_free(program->block_names); @@ -39,6 +105,18 @@ void vsir_program_cleanup(struct vsir_program *program) shader_signature_cleanup(&program->patch_constant_signature); } +const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( + const struct vsir_program *program, enum vkd3d_shader_parameter_name name) +{ + for (unsigned int i = 0; i < program->parameter_count; ++i) + { + if (program->parameters[i].name == name) + return &program->parameters[i]; + } + + return NULL; +} + static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) { return reg->type == VKD3DSPR_FORKINSTID || reg->type == VKD3DSPR_JOININSTID; @@ -46,9 +124,9 @@ static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shade static bool vsir_instruction_is_dcl(const struct vkd3d_shader_instruction *instruction) { - enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; - return (VKD3DSIH_DCL <= handler_idx && handler_idx <= VKD3DSIH_DCL_VERTICES_OUT) - || handler_idx == VKD3DSIH_HS_DECLS; + enum vkd3d_shader_opcode opcode = instruction->opcode; + return (VKD3DSIH_DCL <= opcode && opcode <= VKD3DSIH_DCL_VERTICES_OUT) + || opcode == VKD3DSIH_HS_DECLS; } static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *ins) @@ -60,9 +138,9 @@ static void vkd3d_shader_instruction_make_nop(struct vkd3d_shader_instruction *i static bool vsir_instruction_init_with_params(struct vsir_program *program, struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode handler_idx, unsigned int dst_count, unsigned int src_count) + enum vkd3d_shader_opcode opcode, unsigned int dst_count, unsigned int src_count) { - vsir_instruction_init(ins, location, handler_idx); + vsir_instruction_init(ins, location, opcode); ins->dst_count = dst_count; ins->src_count = src_count; @@ -287,7 +365,7 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro mul_ins = &instructions->elements[pos]; add_ins = &instructions->elements[pos + 1]; - mul_ins->handler_idx = VKD3DSIH_MUL; + mul_ins->opcode = VKD3DSIH_MUL; mul_ins->src_count = 2; if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2))) @@ -311,6 +389,58 @@ static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *pro return VKD3D_OK; } +static enum vkd3d_result vsir_program_lower_sm1_sincos(struct vsir_program *program, + struct vkd3d_shader_instruction *sincos) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + size_t pos = sincos - instructions->elements; + struct vkd3d_shader_instruction *ins; + unsigned int s; + + if (sincos->dst_count != 1) + return VKD3D_OK; + + if (!shader_instruction_array_insert_at(instructions, pos + 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + ins = &instructions->elements[pos + 1]; + + if (!(vsir_instruction_init_with_params(program, ins, &sincos->location, VKD3DSIH_SINCOS, 2, 1))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + ins->flags = sincos->flags; + + *ins->src = *sincos->src; + /* Set the source swizzle to replicate the first component. */ + s = vsir_swizzle_get_component(sincos->src->swizzle, 0); + ins->src->swizzle = vkd3d_shader_create_swizzle(s, s, s, s); + + if (sincos->dst->write_mask & VKD3DSP_WRITEMASK_1) + { + ins->dst[0] = *sincos->dst; + ins->dst[0].write_mask = VKD3DSP_WRITEMASK_1; + } + else + { + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); + } + + if (sincos->dst->write_mask & VKD3DSP_WRITEMASK_0) + { + ins->dst[1] = *sincos->dst; + ins->dst[1].write_mask = VKD3DSP_WRITEMASK_0; + } + else + { + vsir_dst_param_init(&ins->dst[1], VKD3DSPR_NULL, VKD3D_DATA_UNUSED, 0); + } + + /* Make the original instruction no-op */ + vkd3d_shader_instruction_make_nop(sincos); + + return VKD3D_OK; +} + static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program, struct vkd3d_shader_message_context *message_context) { @@ -322,7 +452,7 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr { struct vkd3d_shader_instruction *ins = &instructions->elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_IFC: if ((ret = vsir_program_lower_ifc(program, ins, &tmp_idx, message_context)) < 0) @@ -339,11 +469,18 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr return ret; break; + case VKD3DSIH_DCL: case VKD3DSIH_DCL_CONSTANT_BUFFER: + case VKD3DSIH_DCL_SAMPLER: case VKD3DSIH_DCL_TEMPS: vkd3d_shader_instruction_make_nop(ins); break; + case VKD3DSIH_SINCOS: + if ((ret = vsir_program_lower_sm1_sincos(program, ins)) < 0) + return ret; + break; + default: break; } @@ -492,26 +629,26 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal struct shader_phase_location *loc; bool b; - if (ins->handler_idx == VKD3DSIH_HS_FORK_PHASE || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) + if (ins->opcode == VKD3DSIH_HS_FORK_PHASE || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) { b = flattener_is_in_fork_or_join_phase(normaliser); /* Reset the phase info. */ normaliser->phase_body_idx = ~0u; - normaliser->phase = ins->handler_idx; + normaliser->phase = ins->opcode; normaliser->instance_count = 1; /* Leave the first occurrence and delete the rest. */ if (b) vkd3d_shader_instruction_make_nop(ins); return; } - else if (ins->handler_idx == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT - || ins->handler_idx == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) + else if (ins->opcode == VKD3DSIH_DCL_HS_FORK_PHASE_INSTANCE_COUNT + || ins->opcode == VKD3DSIH_DCL_HS_JOIN_PHASE_INSTANCE_COUNT) { normaliser->instance_count = ins->declaration.count + !ins->declaration.count; vkd3d_shader_instruction_make_nop(ins); return; } - else if (ins->handler_idx == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( + else if (ins->opcode == VKD3DSIH_DCL_INPUT && shader_register_is_phase_instance_id( &ins->declaration.dst.reg)) { vkd3d_shader_instruction_make_nop(ins); @@ -524,7 +661,7 @@ static void flattener_eliminate_phase_related_dcls(struct hull_flattener *normal if (normaliser->phase_body_idx == ~0u) normaliser->phase_body_idx = index; - if (ins->handler_idx == VKD3DSIH_RET) + if (ins->opcode == VKD3DSIH_RET) { normaliser->last_ret_location = ins->location; vkd3d_shader_instruction_make_nop(ins); @@ -666,6 +803,12 @@ static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigne dst->write_mask = VKD3DSP_WRITEMASK_0; } +static void src_param_init_temp_float(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1); + src->reg.idx[0].offset = idx; +} + static void src_param_init_temp_uint(struct vkd3d_shader_src_param *src, unsigned int idx) { vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); @@ -678,12 +821,18 @@ static void src_param_init_const_uint(struct vkd3d_shader_src_param *src, uint32 src->reg.u.immconst_u32[0] = value; } +static void src_param_init_parameter(struct vkd3d_shader_src_param *src, uint32_t idx, enum vkd3d_data_type type) +{ + vsir_src_param_init(src, VKD3DSPR_PARAMETER, type, 1); + src->reg.idx[0].offset = idx; +} + void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode handler_idx) + enum vkd3d_shader_opcode opcode) { memset(ins, 0, sizeof(*ins)); ins->location = *location; - ins->handler_idx = handler_idx; + ins->opcode = opcode; } static bool vsir_instruction_init_label(struct vkd3d_shader_instruction *ins, @@ -770,7 +919,7 @@ static void shader_dst_param_normalise_outpointid(struct vkd3d_shader_dst_param if (control_point_normaliser_is_in_control_point_phase(normaliser) && reg->type == VKD3DSPR_OUTPUT) { /* The TPF reader validates idx_count. */ - assert(reg->idx_count == 1); + VKD3D_ASSERT(reg->idx_count == 1); reg->idx[1] = reg->idx[0]; /* The control point id param is implicit here. Avoid later complications by inserting it. */ reg->idx[0].offset = 0; @@ -865,12 +1014,12 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i { ins = &instructions->elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: - normaliser.phase = ins->handler_idx; + normaliser.phase = ins->opcode; break; default: if (vsir_instruction_is_dcl(ins)) @@ -888,7 +1037,7 @@ static enum vkd3d_result instruction_array_normalise_hull_shader_control_point_i { ins = &instructions->elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: input_control_point_count = ins->declaration.count; @@ -992,16 +1141,16 @@ static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], u { unsigned int i, j, r, c, component_idx, component_count; - assert(write_mask <= VKD3DSP_WRITEMASK_ALL); + VKD3D_ASSERT(write_mask <= VKD3DSP_WRITEMASK_ALL); component_idx = vsir_write_mask_get_component_idx(write_mask); component_count = vsir_write_mask_component_count(write_mask); - assert(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); + VKD3D_ASSERT(register_idx < MAX_REG_OUTPUT && MAX_REG_OUTPUT - register_idx >= register_count); if (range_map[register_idx][component_idx] > register_count && is_dcl_indexrange) { /* Validated in the TPF reader. */ - assert(range_map[register_idx][component_idx] != UINT8_MAX); + VKD3D_ASSERT(range_map[register_idx][component_idx] != UINT8_MAX); return; } if (range_map[register_idx][component_idx] == register_count) @@ -1021,7 +1170,7 @@ static void range_map_set_register_range(uint8_t range_map[][VKD3D_VEC4_SIZE], u /* A synthetic patch constant range which overlaps an existing range can start upstream of it * for fork/join phase instancing, but ranges declared by dcl_indexrange should not overlap. * The latter is validated in the TPF reader. */ - assert(!range_map[r][c] || !is_dcl_indexrange); + VKD3D_ASSERT(!range_map[r][c] || !is_dcl_indexrange); range_map[r][c] = UINT8_MAX; } } @@ -1224,7 +1373,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map TRACE("Merging %s, reg %u, mask %#x, sysval %#x with %s, mask %#x, sysval %#x.\n", e->semantic_name, e->register_index, e->mask, e->sysval_semantic, f->semantic_name, f->mask, f->sysval_semantic); - assert(!(e->mask & f->mask)); + VKD3D_ASSERT(!(e->mask & f->mask)); e->mask |= f->mask; e->used_mask |= f->used_mask; @@ -1258,7 +1407,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map continue; register_count = range_map_get_register_count(range_map, e->register_index, e->mask); - assert(register_count != UINT8_MAX); + VKD3D_ASSERT(register_count != UINT8_MAX); register_count += !register_count; if (register_count > 1) @@ -1281,7 +1430,7 @@ static bool shader_signature_merge(struct shader_signature *s, uint8_t range_map static unsigned int shader_register_normalise_arrayed_addressing(struct vkd3d_shader_register *reg, unsigned int id_idx, unsigned int register_index) { - assert(id_idx < ARRAY_SIZE(reg->idx) - 1); + VKD3D_ASSERT(id_idx < ARRAY_SIZE(reg->idx) - 1); /* For a relative-addressed register index, move the id up a slot to separate it from the address, * because rel_addr can be replaced with a constant offset in some cases. */ @@ -1388,7 +1537,7 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par if (is_io_dcl) { /* Validated in the TPF reader. */ - assert(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); + VKD3D_ASSERT(element_idx < ARRAY_SIZE(normaliser->input_dcl_params)); if (dcl_params[element_idx]) { @@ -1413,7 +1562,7 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par else { /* The control point id param. */ - assert(reg->idx[0].rel_addr); + VKD3D_ASSERT(reg->idx[0].rel_addr); } id_idx = 1; } @@ -1526,7 +1675,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi struct vkd3d_shader_register *reg; unsigned int i; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_DCL_INPUT: if (normaliser->shader_type == VKD3D_SHADER_TYPE_HULL) @@ -1560,7 +1709,7 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: - normaliser->phase = ins->handler_idx; + normaliser->phase = ins->opcode; memset(normaliser->input_dcl_params, 0, sizeof(normaliser->input_dcl_params)); memset(normaliser->output_dcl_params, 0, sizeof(normaliser->output_dcl_params)); memset(normaliser->pc_dcl_params, 0, sizeof(normaliser->pc_dcl_params)); @@ -1576,7 +1725,33 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi } } -static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program) +static bool use_flat_interpolation(const struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) +{ + static const struct vkd3d_shader_location no_loc; + const struct vkd3d_shader_parameter1 *parameter; + + if (!(parameter = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_FLAT_INTERPOLATION))) + return false; + + if (parameter->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) + { + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Unsupported flat interpolation parameter type %#x.\n", parameter->type); + return false; + } + if (parameter->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) + { + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid flat interpolation parameter data type %#x.\n", parameter->data_type); + return false; + } + + return parameter->u.immediate_constant.u.u32; +} + +static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) { struct io_normaliser normaliser = {program->instructions}; struct vkd3d_shader_instruction *ins; @@ -1594,7 +1769,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program { ins = &program->instructions.elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_DCL_OUTPUT_CONTROL_POINT_COUNT: normaliser.output_control_point_count = ins->declaration.count; @@ -1608,7 +1783,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program /* fall through */ case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: - normaliser.phase = ins->handler_idx; + normaliser.phase = ins->opcode; break; default: break; @@ -1626,7 +1801,7 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program normaliser.input_range_map[i][j] = normaliser.output_range_map[i][j]; else if (normaliser.input_range_map[i][j] && !normaliser.output_range_map[i][j]) normaliser.output_range_map[i][j] = normaliser.input_range_map[i][j]; - else assert(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); + else VKD3D_ASSERT(normaliser.input_range_map[i][j] == normaliser.output_range_map[i][j]); } } } @@ -1639,6 +1814,18 @@ static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program return VKD3D_ERROR_OUT_OF_MEMORY; } + if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL + && program->shader_version.major < 4 && use_flat_interpolation(program, message_context)) + { + for (i = 0; i < program->input_signature.element_count; ++i) + { + struct signature_element *element = &program->input_signature.elements[i]; + + if (!ascii_strcasecmp(element->semantic_name, "COLOR")) + element->interpolation_mode = VKD3DSIM_CONSTANT; + } + } + normaliser.phase = VKD3DSIH_INVALID; for (i = 0; i < normaliser.instructions.count; ++i) shader_instruction_normalise_io_params(&normaliser.instructions.elements[i], &normaliser); @@ -1740,7 +1927,7 @@ static enum vkd3d_result instruction_array_normalise_flat_constants(struct vsir_ { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - if (ins->handler_idx == VKD3DSIH_DEF || ins->handler_idx == VKD3DSIH_DEFI || ins->handler_idx == VKD3DSIH_DEFB) + if (ins->opcode == VKD3DSIH_DEF || ins->opcode == VKD3DSIH_DEFI || ins->opcode == VKD3DSIH_DEFB) { struct flat_constant_def *def; @@ -1779,7 +1966,7 @@ static void remove_dead_code(struct vsir_program *program) { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_IF: case VKD3DSIH_LOOP: @@ -1799,7 +1986,7 @@ static void remove_dead_code(struct vsir_program *program) { if (depth > 0) { - if (ins->handler_idx != VKD3DSIH_ELSE) + if (ins->opcode != VKD3DSIH_ELSE) --depth; vkd3d_shader_instruction_make_nop(ins); } @@ -1870,14 +2057,14 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; struct vkd3d_shader_src_param *srcs; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_TEX: if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) return VKD3D_ERROR_OUT_OF_MEMORY; memset(srcs, 0, sizeof(*srcs) * 3); - ins->handler_idx = VKD3DSIH_SAMPLE; + ins->opcode = VKD3DSIH_SAMPLE; srcs[0] = ins->src[0]; @@ -1899,13 +2086,42 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr ins->src_count = 3; break; + case VKD3DSIH_TEXLDD: + if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 5))) + return VKD3D_ERROR_OUT_OF_MEMORY; + memset(srcs, 0, sizeof(*srcs) * 5); + + ins->opcode = VKD3DSIH_SAMPLE_GRAD; + + srcs[0] = ins->src[0]; + + srcs[1].reg.type = VKD3DSPR_RESOURCE; + srcs[1].reg.idx[0] = ins->src[1].reg.idx[0]; + srcs[1].reg.idx[1] = ins->src[1].reg.idx[0]; + srcs[1].reg.idx_count = 2; + srcs[1].reg.data_type = VKD3D_DATA_RESOURCE; + srcs[1].reg.dimension = VSIR_DIMENSION_VEC4; + srcs[1].swizzle = VKD3D_SHADER_NO_SWIZZLE; + + srcs[2].reg.type = VKD3DSPR_SAMPLER; + srcs[2].reg.idx[0] = ins->src[1].reg.idx[0]; + srcs[2].reg.idx[1] = ins->src[1].reg.idx[0]; + srcs[2].reg.idx_count = 2; + srcs[2].reg.data_type = VKD3D_DATA_SAMPLER; + + srcs[3] = ins->src[2]; + srcs[4] = ins->src[3]; + + ins->src = srcs; + ins->src_count = 5; + break; + case VKD3DSIH_TEXBEM: case VKD3DSIH_TEXBEML: case VKD3DSIH_TEXCOORD: case VKD3DSIH_TEXDEPTH: case VKD3DSIH_TEXDP3: case VKD3DSIH_TEXDP3TEX: - case VKD3DSIH_TEXLDD: case VKD3DSIH_TEXLDL: case VKD3DSIH_TEXM3x2PAD: case VKD3DSIH_TEXM3x2TEX: @@ -1919,7 +2135,7 @@ static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_pr case VKD3DSIH_TEXREG2RGB: vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to not yet implemented feature: " - "Combined sampler instruction %#x.", ins->handler_idx); + "Combined sampler instruction %#x.", ins->opcode); return VKD3D_ERROR_NOT_IMPLEMENTED; default: @@ -2030,7 +2246,7 @@ static bool cf_flattener_copy_instruction(struct cf_flattener *flattener, { struct vkd3d_shader_instruction *dst_ins; - if (instruction->handler_idx == VKD3DSIH_NOP) + if (instruction->opcode == VKD3DSIH_NOP) return true; if (!(dst_ins = cf_flattener_require_space(flattener, 1))) @@ -2245,9 +2461,9 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte * phase instruction, and in all other shader types begins with the first label instruction. * Declaring an indexable temp with function scope is not considered a declaration, * because it needs to live inside a function. */ - if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) + if (!after_declarations_section && instruction->opcode != VKD3DSIH_NOP) { - bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP + bool is_function_indexable = instruction->opcode == VKD3DSIH_DCL_INDEXABLE_TEMP && instruction->declaration.indexable_temp.has_function_scope; if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) @@ -2260,14 +2476,14 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte cf_info = flattener->control_flow_depth ? &flattener->control_flow_info[flattener->control_flow_depth - 1] : NULL; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: if (!cf_flattener_copy_instruction(flattener, instruction)) return VKD3D_ERROR_OUT_OF_MEMORY; - if (instruction->handler_idx != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) + if (instruction->opcode != VKD3DSIH_HS_CONTROL_POINT_PHASE || !instruction->flags) after_declarations_section = false; break; @@ -2601,7 +2817,7 @@ static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsi static unsigned int label_from_src_param(const struct vkd3d_shader_src_param *param) { - assert(param->reg.type == VKD3DSPR_LABEL); + VKD3D_ASSERT(param->reg.type == VKD3DSPR_LABEL); return param->reg.idx[0].offset; } @@ -2662,7 +2878,7 @@ static enum vkd3d_result lower_switch_to_if_ladder(struct vsir_program *program) struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; unsigned int case_count, j, default_label; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_LABEL: current_label = label_from_src_param(&ins->src[0]); @@ -2858,7 +3074,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ /* Only phi src/dst SSA values need be converted here. Structurisation may * introduce new cases of undominated SSA use, which will be handled later. */ - if (ins->handler_idx != VKD3DSIH_PHI) + if (ins->opcode != VKD3DSIH_PHI) continue; ++phi_count; @@ -2870,7 +3086,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ unsigned int label; label = label_from_src_param(&ins->src[j + 1]); - assert(label); + VKD3D_ASSERT(label); info = &block_info[label - 1]; @@ -2907,7 +3123,7 @@ static enum vkd3d_result vsir_program_materialise_phi_ssas_to_temps(struct vsir_ for (j = 0; j < ins->src_count; ++j) materialize_ssas_to_temps_process_reg(program, &alloc, &ins->src[j].reg); - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_LABEL: current_label = label_from_src_param(&ins->src[0]); @@ -3027,7 +3243,7 @@ static enum vkd3d_result vsir_block_init(struct vsir_block *block, unsigned int byte_count = VKD3D_BITMAP_SIZE(block_count) * sizeof(*block->dominates); - assert(label); + VKD3D_ASSERT(label); memset(block, 0, sizeof(*block)); block->label = label; vsir_block_list_init(&block->predecessors); @@ -3311,7 +3527,7 @@ static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_blo struct vsir_block *successor = &cfg->blocks[target - 1]; enum vkd3d_result ret; - assert(successor->label != 0); + VKD3D_ASSERT(successor->label != 0); if ((ret = vsir_block_list_add(&block->successors, successor)) < 0) return ret; @@ -3336,7 +3552,7 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) if (block->label == 0) continue; - switch (block->end->handler_idx) + switch (block->end->opcode) { case VKD3DSIH_RET: shape = "trapezium"; @@ -3478,7 +3694,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; bool finish = false; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_PHI: case VKD3DSIH_SWITCH_MONOLITHIC: @@ -3488,11 +3704,11 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program { unsigned int label = label_from_src_param(&instruction->src[0]); - assert(!current_block); - assert(label > 0); - assert(label <= cfg->block_count); + VKD3D_ASSERT(!current_block); + VKD3D_ASSERT(label > 0); + VKD3D_ASSERT(label <= cfg->block_count); current_block = &cfg->blocks[label - 1]; - assert(current_block->label == 0); + VKD3D_ASSERT(current_block->label == 0); if ((ret = vsir_block_init(current_block, label, program->block_count)) < 0) goto fail; current_block->begin = &program->instructions.elements[i + 1]; @@ -3503,7 +3719,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program case VKD3DSIH_BRANCH: case VKD3DSIH_RET: - assert(current_block); + VKD3D_ASSERT(current_block); current_block->end = instruction; current_block = NULL; break; @@ -3511,7 +3727,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: - assert(!current_block); + VKD3D_ASSERT(!current_block); finish = true; break; @@ -3533,7 +3749,7 @@ static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program if (block->label == 0) continue; - switch (block->end->handler_idx) + switch (block->end->opcode) { case VKD3DSIH_RET: break; @@ -3581,7 +3797,7 @@ static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, stru { size_t i; - assert(current->label != 0); + VKD3D_ASSERT(current->label != 0); if (current == reference) return; @@ -3615,11 +3831,16 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) { struct vsir_block *block2 = &cfg->blocks[j]; - if (block2->label == 0) + if (block2->label == 0 || !vsir_block_dominates(block, block2)) continue; - if (vsir_block_dominates(block, block2)) - vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); + if (cfg->debug_buffer.content_size > 512) + { + TRACE("%s...\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block %u dominates: ...", block->label); + } + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); } TRACE("%s\n", cfg->debug_buffer.buffer); vkd3d_string_buffer_clear(&cfg->debug_buffer); @@ -3711,7 +3932,16 @@ static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label); for (k = 0; k < loop->count; ++k) + { + if (cfg->debug_buffer.content_size > 512) + { + TRACE("%s...\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop: ...", + block->label, header->label); + } vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", loop->blocks[k]->label); + } TRACE("%s\n", cfg->debug_buffer.buffer); vkd3d_string_buffer_clear(&cfg->debug_buffer); @@ -3796,7 +4026,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) /* Do not count back edges. */ if (cfg->loops_by_header[i] != SIZE_MAX) { - assert(in_degrees[i] > 0); + VKD3D_ASSERT(in_degrees[i] > 0); in_degrees[i] -= 1; } @@ -3882,7 +4112,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) inner_stack_item->seen_count += new_seen_count; - assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); + VKD3D_ASSERT(inner_stack_item->seen_count <= inner_stack_item->loop->count); if (inner_stack_item->seen_count != inner_stack_item->loop->count) break; @@ -3902,7 +4132,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) if (vsir_block_dominates(successor, block)) continue; - assert(in_degrees[successor->label - 1] > 0); + VKD3D_ASSERT(in_degrees[successor->label - 1] > 0); --in_degrees[successor->label - 1]; if (in_degrees[successor->label - 1] == 0) @@ -3923,7 +4153,7 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) goto fail; } - assert(sorter.stack_count == 0); + VKD3D_ASSERT(sorter.stack_count == 0); vkd3d_free(in_degrees); vkd3d_free(sorter.stack); @@ -3934,7 +4164,15 @@ static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:"); for (i = 0; i < cfg->order.count; ++i) + { + if (cfg->debug_buffer.content_size > 512) + { + TRACE("%s...\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order: ..."); + } vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label); + } TRACE("%s\n", cfg->debug_buffer.buffer); vkd3d_string_buffer_clear(&cfg->debug_buffer); @@ -3988,12 +4226,12 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ ACTION_EXTEND, } action = ACTION_CREATE_NEW; - /* We've already contructed loop intervals for the back + /* We've already constructed loop intervals for the back * edges, there's nothing more to do. */ if (vsir_block_dominates(successor, block)) continue; - assert(block->order_pos < successor->order_pos); + VKD3D_ASSERT(block->order_pos < successor->order_pos); /* Jumping from a block to the following one is always * possible, so nothing to do. */ @@ -4066,7 +4304,7 @@ static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_ { if (interval->synthetic) interval->begin = min(begin, interval->begin); - assert(begin >= interval->begin); + VKD3D_ASSERT(begin >= interval->begin); } } @@ -4119,7 +4357,7 @@ static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block break; } - assert(action->target != UINT_MAX); + VKD3D_ASSERT(action->target != UINT_MAX); action->jump_type = JUMP_CONTINUE; } else @@ -4141,7 +4379,7 @@ static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block if (action->target == UINT_MAX) { - assert(successor->order_pos == block->order_pos + 1); + VKD3D_ASSERT(successor->order_pos == block->order_pos + 1); action->jump_type = JUMP_NONE; } else @@ -4168,7 +4406,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) struct vsir_block *block = cfg->order.blocks[i]; struct vsir_cfg_structure *structure; - assert(stack_depth > 0); + VKD3D_ASSERT(stack_depth > 0); /* Open loop intervals. */ while (open_interval_idx < cfg->loop_interval_count) @@ -4192,7 +4430,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) structure->u.block = block; /* Generate between zero and two jump instructions. */ - switch (block->end->handler_idx) + switch (block->end->opcode) { case VKD3DSIH_BRANCH: { @@ -4227,7 +4465,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) * selection ladders. */ if (action_true.successor == action_false.successor) { - assert(action_true.jump_type == action_false.jump_type); + VKD3D_ASSERT(action_true.jump_type == action_false.jump_type); } else { @@ -4243,10 +4481,10 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) struct vsir_cfg_structure_list *inner_loop_frame = stack[stack_depth - 2]; struct vsir_cfg_structure *inner_loop = &inner_loop_frame->structures[inner_loop_frame->count - 1]; - assert(inner_loop->type == STRUCTURE_TYPE_LOOP); + VKD3D_ASSERT(inner_loop->type == STRUCTURE_TYPE_LOOP); /* Otherwise, if one of the branches is - * continueing the inner loop we're inside, + * continue-ing the inner loop we're inside, * make sure it's the false branch (because it * will be optimized out later). */ if (action_true.jump_type == JUMP_CONTINUE && action_true.target == inner_loop->u.loop.idx) @@ -4260,7 +4498,7 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) action_false = tmp; } - assert(action_true.jump_type != JUMP_NONE); + VKD3D_ASSERT(action_true.jump_type != JUMP_NONE); if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) goto fail; @@ -4300,8 +4538,8 @@ static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) } } - assert(stack_depth == 0); - assert(open_interval_idx == cfg->loop_interval_count); + VKD3D_ASSERT(stack_depth == 0); + VKD3D_ASSERT(open_interval_idx == cfg->loop_interval_count); if (TRACE_ON()) vsir_cfg_dump_structured_program(cfg); @@ -4325,7 +4563,7 @@ static void vsir_cfg_remove_trailing_continue(struct vsir_cfg *cfg, && !last->u.jump.condition && last->u.jump.target == target) { --list->count; - assert(cfg->loop_intervals[target].target_count > 0); + VKD3D_ASSERT(cfg->loop_intervals[target].target_count > 0); --cfg->loop_intervals[target].target_count; } } @@ -4366,7 +4604,7 @@ static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg size_t pos = list->count - 1; selection = &list->structures[pos]; - assert(selection->type == STRUCTURE_TYPE_SELECTION); + VKD3D_ASSERT(selection->type == STRUCTURE_TYPE_SELECTION); if_break = vsir_cfg_get_trailing_break(&selection->u.selection.if_body); else_break = vsir_cfg_get_trailing_break(&selection->u.selection.else_body); @@ -4387,19 +4625,19 @@ static enum vkd3d_result vsir_cfg_move_breaks_out_of_selections(struct vsir_cfg /* Pointer `selection' could have been invalidated by the append * operation. */ selection = &list->structures[pos]; - assert(selection->type == STRUCTURE_TYPE_SELECTION); + VKD3D_ASSERT(selection->type == STRUCTURE_TYPE_SELECTION); if (if_target == max_target) { --selection->u.selection.if_body.count; - assert(cfg->loop_intervals[if_target].target_count > 0); + VKD3D_ASSERT(cfg->loop_intervals[if_target].target_count > 0); --cfg->loop_intervals[if_target].target_count; } if (else_target == max_target) { --selection->u.selection.else_body.count; - assert(cfg->loop_intervals[else_target].target_count > 0); + VKD3D_ASSERT(cfg->loop_intervals[else_target].target_count > 0); --cfg->loop_intervals[else_target].target_count; } @@ -4507,7 +4745,7 @@ static enum vkd3d_result vsir_cfg_append_loop(struct vsir_cfg *cfg, } target = trailing_break->u.jump.target; - assert(cfg->loop_intervals[target].target_count > 0); + VKD3D_ASSERT(cfg->loop_intervals[target].target_count > 0); /* If the loop is not targeted by any jump, we can remove it. The * trailing `break' then targets another loop, so we have to keep @@ -4674,7 +4912,7 @@ static void vsir_cfg_mark_trampolines(struct vsir_cfg *cfg, struct vsir_cfg_stru break; for (l = loop; l && l->u.loop.idx != structure->u.jump.target; l = l->u.loop.outer_loop) { - assert(l->type == STRUCTURE_TYPE_LOOP); + VKD3D_ASSERT(l->type == STRUCTURE_TYPE_LOOP); l->u.loop.needs_trampoline = true; } break; @@ -4714,7 +4952,7 @@ static void vsir_cfg_mark_launchers(struct vsir_cfg *cfg, struct vsir_cfg_struct case STRUCTURE_TYPE_JUMP: if (structure->u.jump.type != JUMP_BREAK && structure->u.jump.type != JUMP_CONTINUE) break; - assert(loop && loop->type == STRUCTURE_TYPE_LOOP); + VKD3D_ASSERT(loop && loop->type == STRUCTURE_TYPE_LOOP); if (loop->u.loop.needs_trampoline) structure->u.jump.needs_launcher = true; break; @@ -4888,14 +5126,14 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, struct vsir_cfg_emit_target *target = cfg->target; const struct vkd3d_shader_location no_loc = {0}; /* Encode the jump target as the loop index plus a bit to remember whether - * we're breaking or continueing. */ + * we're breaking or continue-ing. */ unsigned int jump_target = jump->target << 1; enum vkd3d_shader_opcode opcode; switch (jump->type) { case JUMP_CONTINUE: - /* If we're continueing the loop we're directly inside, then we can emit a + /* If we're continue-ing the loop we're directly inside, then we can emit a * `continue'. Otherwise we first have to break all the loops between here * and the loop to continue, recording our intention to continue * in the lowest bit of jump_target. */ @@ -4912,7 +5150,7 @@ static enum vkd3d_result vsir_cfg_structure_list_emit_jump(struct vsir_cfg *cfg, break; case JUMP_RET: - assert(!jump->condition); + VKD3D_ASSERT(!jump->condition); opcode = VKD3DSIH_RET; break; @@ -5049,22 +5287,22 @@ static enum vkd3d_result vsir_program_structurize(struct vsir_program *program, { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_LABEL: - assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); + VKD3D_ASSERT(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); TRACE("Structurizing a non-hull shader.\n"); if ((ret = vsir_program_structurize_function(program, message_context, &target, &i)) < 0) goto fail; - assert(i == program->instructions.count); + VKD3D_ASSERT(i == program->instructions.count); break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: - assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); - TRACE("Structurizing phase %u of a hull shader.\n", ins->handler_idx); + VKD3D_ASSERT(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); + TRACE("Structurizing phase %u of a hull shader.\n", ins->opcode); target.instructions[target.ins_count++] = *ins; ++i; if ((ret = vsir_program_structurize_function(program, message_context, @@ -5222,22 +5460,22 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru { struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; - switch (ins->handler_idx) + switch (ins->opcode) { case VKD3DSIH_LABEL: - assert(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); + VKD3D_ASSERT(program->shader_version.type != VKD3D_SHADER_TYPE_HULL); TRACE("Materializing undominated SSAs in a non-hull shader.\n"); if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( program, message_context, &i)) < 0) return ret; - assert(i == program->instructions.count); + VKD3D_ASSERT(i == program->instructions.count); break; case VKD3DSIH_HS_CONTROL_POINT_PHASE: case VKD3DSIH_HS_FORK_PHASE: case VKD3DSIH_HS_JOIN_PHASE: - assert(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); - TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->handler_idx); + VKD3D_ASSERT(program->shader_version.type == VKD3D_SHADER_TYPE_HULL); + TRACE("Materializing undominated SSAs in phase %u of a hull shader.\n", ins->opcode); ++i; if ((ret = vsir_program_materialize_undominated_ssas_to_temps_in_function( program, message_context, &i)) < 0) @@ -5253,6 +5491,192 @@ static enum vkd3d_result vsir_program_materialize_undominated_ssas_to_temps(stru return VKD3D_OK; } +static bool find_colour_signature_idx(const struct shader_signature *signature, uint32_t *index) +{ + for (unsigned int i = 0; i < signature->element_count; ++i) + { + if (signature->elements[i].sysval_semantic == VKD3D_SHADER_SV_TARGET + && !signature->elements[i].register_index) + { + *index = i; + return true; + } + } + + return false; +} + +static enum vkd3d_result insert_alpha_test_before_ret(struct vsir_program *program, + const struct vkd3d_shader_instruction *ret, enum vkd3d_shader_comparison_func compare_func, + const struct vkd3d_shader_parameter1 *ref, uint32_t colour_signature_idx, uint32_t colour_temp, size_t *ret_pos) +{ + struct vkd3d_shader_instruction_array *instructions = &program->instructions; + size_t pos = ret - instructions->elements; + struct vkd3d_shader_instruction *ins; + + static const struct + { + enum vkd3d_shader_opcode float_opcode; + enum vkd3d_shader_opcode uint_opcode; + bool swap; + } + opcodes[] = + { + [VKD3D_SHADER_COMPARISON_FUNC_EQUAL] = {VKD3DSIH_EQO, VKD3DSIH_IEQ}, + [VKD3D_SHADER_COMPARISON_FUNC_NOT_EQUAL] = {VKD3DSIH_NEO, VKD3DSIH_INE}, + [VKD3D_SHADER_COMPARISON_FUNC_GREATER_EQUAL] = {VKD3DSIH_GEO, VKD3DSIH_UGE}, + [VKD3D_SHADER_COMPARISON_FUNC_LESS] = {VKD3DSIH_LTO, VKD3DSIH_ULT}, + [VKD3D_SHADER_COMPARISON_FUNC_LESS_EQUAL] = {VKD3DSIH_GEO, VKD3DSIH_UGE, true}, + [VKD3D_SHADER_COMPARISON_FUNC_GREATER] = {VKD3DSIH_LTO, VKD3DSIH_ULT, true}, + }; + + if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_NEVER) + { + if (!shader_instruction_array_insert_at(&program->instructions, pos, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + ins = &program->instructions.elements[pos]; + + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; + src_param_init_const_uint(&ins->src[0], 0); + + *ret_pos = pos + 1; + return VKD3D_OK; + } + + if (!shader_instruction_array_insert_at(&program->instructions, pos, 3)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + ins = &program->instructions.elements[pos]; + + switch (ref->data_type) + { + case VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32: + vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].float_opcode, 1, 2); + src_param_init_temp_float(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); + src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], + VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_FLOAT); + break; + + case VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32: + vsir_instruction_init_with_params(program, ins, &ret->location, opcodes[compare_func].uint_opcode, 1, 2); + src_param_init_temp_uint(&ins->src[opcodes[compare_func].swap ? 1 : 0], colour_temp); + src_param_init_parameter(&ins->src[opcodes[compare_func].swap ? 0 : 1], + VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, VKD3D_DATA_UINT); + break; + + default: + FIXME("Unhandled parameter data type %#x.\n", ref->data_type); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + dst_param_init_ssa_bool(&ins->dst[0], program->ssa_count); + ins->src[opcodes[compare_func].swap ? 1 : 0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[opcodes[compare_func].swap ? 1 : 0].swizzle = VKD3D_SHADER_SWIZZLE(W, W, W, W); + + ++ins; + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_DISCARD, 0, 1); + ins->flags = VKD3D_SHADER_CONDITIONAL_OP_Z; + src_param_init_ssa_bool(&ins->src[0], program->ssa_count); + + ++program->ssa_count; + + ++ins; + vsir_instruction_init_with_params(program, ins, &ret->location, VKD3DSIH_MOV, 1, 1); + vsir_dst_param_init(&ins->dst[0], VKD3DSPR_OUTPUT, VKD3D_DATA_FLOAT, 1); + ins->dst[0].reg.idx[0].offset = colour_signature_idx; + ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->dst[0].write_mask = program->output_signature.elements[colour_signature_idx].mask; + src_param_init_temp_float(&ins->src[0], colour_temp); + ins->src[0].reg.dimension = VSIR_DIMENSION_VEC4; + ins->src[0].swizzle = VKD3D_SHADER_NO_SWIZZLE; + + *ret_pos = pos + 3; + return VKD3D_OK; +} + +static enum vkd3d_result vsir_program_insert_alpha_test(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) +{ + const struct vkd3d_shader_parameter1 *func = NULL, *ref = NULL; + static const struct vkd3d_shader_location no_loc; + enum vkd3d_shader_comparison_func compare_func; + uint32_t colour_signature_idx, colour_temp; + struct vkd3d_shader_instruction *ins; + size_t new_pos; + int ret; + + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + return VKD3D_OK; + + if (!find_colour_signature_idx(&program->output_signature, &colour_signature_idx) + || !(program->output_signature.elements[colour_signature_idx].mask & VKD3DSP_WRITEMASK_3)) + return VKD3D_OK; + + if (!(func = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_FUNC)) + || !(ref = vsir_program_get_parameter(program, VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF))) + return VKD3D_OK; + + if (func->type != VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) + { + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Unsupported alpha test function parameter type %#x.\n", func->type); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + if (func->data_type != VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32) + { + vkd3d_shader_error(message_context, &no_loc, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, + "Invalid alpha test function parameter data type %#x.\n", func->data_type); + return VKD3D_ERROR_INVALID_ARGUMENT; + } + compare_func = func->u.immediate_constant.u.u32; + + if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_ALWAYS) + return VKD3D_OK; + + /* We're going to be reading from the output, so we need to go + * through the whole shader and convert it to a temp. */ + + if (compare_func != VKD3D_SHADER_COMPARISON_FUNC_NEVER) + colour_temp = program->temp_count++; + + for (size_t i = 0; i < program->instructions.count; ++i) + { + ins = &program->instructions.elements[i]; + + if (vsir_instruction_is_dcl(ins)) + continue; + + if (ins->opcode == VKD3DSIH_RET) + { + if ((ret = insert_alpha_test_before_ret(program, ins, compare_func, + ref, colour_signature_idx, colour_temp, &new_pos)) < 0) + return ret; + i = new_pos; + continue; + } + + /* No need to convert it if the comparison func is NEVER; we don't + * read from the output in that case. */ + if (compare_func == VKD3D_SHADER_COMPARISON_FUNC_NEVER) + continue; + + for (size_t j = 0; j < ins->dst_count; ++j) + { + struct vkd3d_shader_dst_param *dst = &ins->dst[j]; + + /* Note we run after I/O normalization. */ + if (dst->reg.type == VKD3DSPR_OUTPUT && dst->reg.idx[0].offset == colour_signature_idx) + { + dst->reg.type = VKD3DSPR_TEMP; + dst->reg.idx[0].offset = colour_temp; + } + } + } + + return VKD3D_OK; +} + struct validation_context { struct vkd3d_shader_message_context *message_context; @@ -5641,7 +6065,7 @@ static void vsir_validate_dst_count(struct validation_context *ctx, if (instruction->dst_count != count) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DEST_COUNT, "Invalid destination count %u for an instruction of type %#x, expected %u.", - instruction->dst_count, instruction->handler_idx, count); + instruction->dst_count, instruction->opcode, count); } static void vsir_validate_src_count(struct validation_context *ctx, @@ -5650,7 +6074,7 @@ static void vsir_validate_src_count(struct validation_context *ctx, if (instruction->src_count != count) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, "Invalid source count %u for an instruction of type %#x, expected %u.", - instruction->src_count, instruction->handler_idx, count); + instruction->src_count, instruction->opcode, count); } static bool vsir_validate_src_min_count(struct validation_context *ctx, @@ -5660,7 +6084,7 @@ static bool vsir_validate_src_min_count(struct validation_context *ctx, { validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, "Invalid source count %u for an instruction of type %#x, expected at least %u.", - instruction->src_count, instruction->handler_idx, count); + instruction->src_count, instruction->opcode, count); return false; } @@ -5674,7 +6098,7 @@ static bool vsir_validate_src_max_count(struct validation_context *ctx, { validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_SOURCE_COUNT, "Invalid source count %u for an instruction of type %#x, expected at most %u.", - instruction->src_count, instruction->handler_idx, count); + instruction->src_count, instruction->opcode, count); return false; } @@ -5697,11 +6121,11 @@ static const char *name_from_cf_type(enum cf_type type) static void vsir_validate_cf_type(struct validation_context *ctx, const struct vkd3d_shader_instruction *instruction, enum cf_type expected_type) { - assert(ctx->cf_type != CF_TYPE_UNKNOWN); - assert(expected_type != CF_TYPE_UNKNOWN); + VKD3D_ASSERT(ctx->cf_type != CF_TYPE_UNKNOWN); + VKD3D_ASSERT(expected_type != CF_TYPE_UNKNOWN); if (ctx->cf_type != expected_type) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x in %s shader.", - instruction->handler_idx, name_from_cf_type(ctx->cf_type)); + instruction->opcode, name_from_cf_type(ctx->cf_type)); } static void vsir_validate_instruction(struct validation_context *ctx) @@ -5718,13 +6142,13 @@ static void vsir_validate_instruction(struct validation_context *ctx) for (i = 0; i < instruction->src_count; ++i) vsir_validate_src_param(ctx, &instruction->src[i]); - if (instruction->handler_idx >= VKD3DSIH_INVALID) + if (instruction->opcode >= VKD3DSIH_INVALID) { validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Invalid instruction handler %#x.", - instruction->handler_idx); + instruction->opcode); } - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_HS_DECLS: case VKD3DSIH_HS_CONTROL_POINT_PHASE: @@ -5733,12 +6157,14 @@ static void vsir_validate_instruction(struct validation_context *ctx) vsir_validate_dst_count(ctx, instruction, 0); vsir_validate_src_count(ctx, instruction, 0); if (version->type != VKD3D_SHADER_TYPE_HULL) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Phase instruction %#x is only valid in a hull shader.", - instruction->handler_idx); + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, + "Phase instruction %#x is only valid in a hull shader.", + instruction->opcode); if (ctx->depth != 0) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Phase instruction %#x must appear to top level.", - instruction->handler_idx); - ctx->phase = instruction->handler_idx; + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Phase instruction %#x must appear to top level.", + instruction->opcode); + ctx->phase = instruction->opcode; ctx->dcl_temps_found = false; return; @@ -5812,7 +6238,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) && ctx->phase == VKD3DSIH_INVALID) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_HANDLER, "Instruction %#x appear before any phase instruction in a hull shader.", - instruction->handler_idx); + instruction->opcode); /* We support two different control flow types in shaders: * block-based, like DXIL and SPIR-V, and structured, like D3DBC @@ -5824,7 +6250,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) * block, but need for that hasn't arisen yet, so we don't. */ if (ctx->cf_type == CF_TYPE_UNKNOWN && !vsir_instruction_is_dcl(instruction)) { - if (instruction->handler_idx == VKD3DSIH_LABEL) + if (instruction->opcode == VKD3DSIH_LABEL) ctx->cf_type = CF_TYPE_BLOCKS; else ctx->cf_type = CF_TYPE_STRUCTURED; @@ -5832,7 +6258,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) if (ctx->cf_type == CF_TYPE_BLOCKS && !vsir_instruction_is_dcl(instruction)) { - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_LABEL: if (ctx->inside_block) @@ -5844,20 +6270,22 @@ static void vsir_validate_instruction(struct validation_context *ctx) case VKD3DSIH_BRANCH: case VKD3DSIH_SWITCH_MONOLITHIC: if (!ctx->inside_block) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", - instruction->handler_idx); + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Invalid instruction %#x outside any block.", + instruction->opcode); ctx->inside_block = false; break; default: if (!ctx->inside_block) - validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "Invalid instruction %#x outside any block.", - instruction->handler_idx); + validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Invalid instruction %#x outside any block.", + instruction->opcode); break; } } - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_DCL_TEMPS: vsir_validate_dst_count(ctx, instruction, 0); @@ -5877,7 +6305,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) vsir_validate_src_count(ctx, instruction, 1); if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) return; - ctx->blocks[ctx->depth++] = instruction->handler_idx; + ctx->blocks[ctx->depth++] = instruction->opcode; break; case VKD3DSIH_IFC: @@ -5896,7 +6324,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) if (ctx->depth == 0 || ctx->blocks[ctx->depth - 1] != VKD3DSIH_IF) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, "ELSE instruction doesn't terminate IF block."); else - ctx->blocks[ctx->depth - 1] = instruction->handler_idx; + ctx->blocks[ctx->depth - 1] = instruction->opcode; break; case VKD3DSIH_ENDIF: @@ -5915,7 +6343,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) vsir_validate_src_count(ctx, instruction, version->major <= 3 ? 2 : 0); if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) return; - ctx->blocks[ctx->depth++] = instruction->handler_idx; + ctx->blocks[ctx->depth++] = instruction->opcode; break; case VKD3DSIH_ENDLOOP: @@ -5934,7 +6362,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) vsir_validate_src_count(ctx, instruction, 1); if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) return; - ctx->blocks[ctx->depth++] = instruction->handler_idx; + ctx->blocks[ctx->depth++] = instruction->opcode; break; case VKD3DSIH_ENDREP: @@ -5953,7 +6381,7 @@ static void vsir_validate_instruction(struct validation_context *ctx) vsir_validate_src_count(ctx, instruction, 1); if (!vkd3d_array_reserve((void **)&ctx->blocks, &ctx->blocks_capacity, ctx->depth + 1, sizeof(*ctx->blocks))) return; - ctx->blocks[ctx->depth++] = instruction->handler_idx; + ctx->blocks[ctx->depth++] = instruction->opcode; break; case VKD3DSIH_ENDSWITCH: @@ -6225,7 +6653,7 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t return result; } - if ((result = vsir_program_normalise_io_registers(program)) < 0) + if ((result = vsir_program_normalise_io_registers(program, message_context)) < 0) return result; if ((result = instruction_array_normalise_flat_constants(program)) < 0) @@ -6241,6 +6669,9 @@ enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t return result; } + if ((result = vsir_program_insert_alpha_test(program, message_context)) < 0) + return result; + if (TRACE_ON()) vkd3d_shader_trace(program); diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.h b/libs/vkd3d/libs/vkd3d-shader/preproc.h index 4860cf5f90e..9806614a35b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.h +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.h @@ -141,7 +141,7 @@ void preproc_warning(struct preproc_ctx *ctx, const struct vkd3d_shader_location static inline struct preproc_file *preproc_get_top_file(struct preproc_ctx *ctx) { - assert(ctx->file_count); + VKD3D_ASSERT(ctx->file_count); return &ctx->file_stack[ctx->file_count - 1]; } diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.l b/libs/vkd3d/libs/vkd3d-shader/preproc.l index be50d3b9020..7fc963192cf 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.l +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.l @@ -20,6 +20,7 @@ %{ +#include "preproc.h" #include "preproc.tab.h" #undef ERROR /* defined in wingdi.h */ @@ -408,7 +409,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) } ctx->last_was_eof = false; - assert(ctx->file_count); + VKD3D_ASSERT(ctx->file_count); if (!(token = preproc_lexer_lex(lval, lloc, scanner))) { ctx->last_was_eof = true; @@ -646,7 +647,7 @@ int yylex(YYSTYPE *lval, YYLTYPE *lloc, yyscan_t scanner) { struct preproc_text *current_arg = NULL; - assert(func_state->macro->arg_count); + VKD3D_ASSERT(func_state->macro->arg_count); if (func_state->arg_count < func_state->macro->arg_count) current_arg = &func_state->macro->arg_values[func_state->arg_count]; diff --git a/libs/vkd3d/libs/vkd3d-shader/preproc.y b/libs/vkd3d/libs/vkd3d-shader/preproc.y index 009c35ffb97..366e351e3b5 100644 --- a/libs/vkd3d/libs/vkd3d-shader/preproc.y +++ b/libs/vkd3d/libs/vkd3d-shader/preproc.y @@ -119,7 +119,7 @@ bool preproc_add_macro(struct preproc_ctx *ctx, const struct vkd3d_shader_locati macro->body.text = *body; macro->body.location = *body_loc; ret = rb_put(&ctx->macros, name, ¯o->entry); - assert(!ret); + VKD3D_ASSERT(!ret); return true; } diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 984a4f894f6..ed37ac5c45e 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -313,7 +313,7 @@ static bool vkd3d_spirv_stream_append(struct vkd3d_spirv_stream *dst_stream, struct vkd3d_spirv_chunk *chunk; size_t src_location = 0; - assert(list_empty(&dst_stream->inserted_chunks)); + VKD3D_ASSERT(list_empty(&dst_stream->inserted_chunks)); LIST_FOR_EACH_ENTRY(chunk, &src_stream->inserted_chunks, struct vkd3d_spirv_chunk, entry) src_word_count += chunk->word_count; @@ -322,16 +322,16 @@ static bool vkd3d_spirv_stream_append(struct vkd3d_spirv_stream *dst_stream, dst_stream->word_count + src_word_count, sizeof(*dst_stream->words))) return false; - assert(dst_stream->word_count + src_word_count <= dst_stream->capacity); + VKD3D_ASSERT(dst_stream->word_count + src_word_count <= dst_stream->capacity); LIST_FOR_EACH_ENTRY(chunk, &src_stream->inserted_chunks, struct vkd3d_spirv_chunk, entry) { - assert(src_location <= chunk->location); + VKD3D_ASSERT(src_location <= chunk->location); word_count = chunk->location - src_location; memcpy(&dst_stream->words[dst_stream->word_count], &src_stream->words[src_location], word_count * sizeof(*src_stream->words)); dst_stream->word_count += word_count; src_location += word_count; - assert(src_location == chunk->location); + VKD3D_ASSERT(src_location == chunk->location); memcpy(&dst_stream->words[dst_stream->word_count], chunk->words, chunk->word_count * sizeof(*chunk->words)); @@ -464,7 +464,7 @@ static void vkd3d_spirv_set_execution_model(struct vkd3d_spirv_builder *builder, static uint32_t vkd3d_spirv_opcode_word(SpvOp op, unsigned int word_count) { - assert(!(op & ~SpvOpCodeMask)); + VKD3D_ASSERT(!(op & ~SpvOpCodeMask)); return (word_count << SpvWordCountShift) | op; } @@ -538,7 +538,7 @@ static int vkd3d_spirv_declaration_compare(const void *key, const struct rb_entr return ret; if ((ret = vkd3d_u32_compare(a->parameter_count, b->parameter_count))) return ret; - assert(a->parameter_count <= ARRAY_SIZE(a->parameters)); + VKD3D_ASSERT(a->parameter_count <= ARRAY_SIZE(a->parameters)); return memcmp(&a->parameters, &b->parameters, a->parameter_count * sizeof(*a->parameters)); } @@ -554,7 +554,7 @@ static void vkd3d_spirv_insert_declaration(struct vkd3d_spirv_builder *builder, { struct vkd3d_spirv_declaration *d; - assert(declaration->parameter_count <= ARRAY_SIZE(declaration->parameters)); + VKD3D_ASSERT(declaration->parameter_count <= ARRAY_SIZE(declaration->parameters)); if (!(d = vkd3d_malloc(sizeof(*d)))) return; @@ -823,7 +823,7 @@ static uint32_t vkd3d_spirv_build_op_tr2v(struct vkd3d_spirv_builder *builder, static void vkd3d_spirv_begin_function_stream_insertion(struct vkd3d_spirv_builder *builder, size_t location) { - assert(builder->insertion_location == ~(size_t)0); + VKD3D_ASSERT(builder->insertion_location == ~(size_t)0); if (vkd3d_spirv_stream_current_location(&builder->function_stream) == location) return; @@ -1166,7 +1166,7 @@ static uint32_t vkd3d_spirv_get_op_constant(struct vkd3d_spirv_builder *builder, static uint32_t vkd3d_spirv_build_op_constant64(struct vkd3d_spirv_builder *builder, uint32_t result_type, const uint32_t *values, unsigned int value_count) { - assert(value_count == 2); + VKD3D_ASSERT(value_count == 2); return vkd3d_spirv_build_op_trv(builder, &builder->global_stream, SpvOpConstant, result_type, values, value_count); } @@ -1583,13 +1583,13 @@ static uint32_t vkd3d_spirv_build_image_instruction(struct vkd3d_spirv_builder * unsigned int index = 0, i; uint32_t w[10]; - assert(operand_count <= ARRAY_SIZE(w)); + VKD3D_ASSERT(operand_count <= ARRAY_SIZE(w)); for (i = 0; i < operand_count; ++i) w[index++] = operands[i]; if (image_operands_mask) { - assert(index + 1 + image_operand_count <= ARRAY_SIZE(w)); + VKD3D_ASSERT(index + 1 + image_operand_count <= ARRAY_SIZE(w)); w[index++] = image_operands_mask; for (i = 0; i < image_operand_count; ++i) w[index++] = image_operands[i]; @@ -1606,9 +1606,9 @@ static uint32_t vkd3d_spirv_build_op_image_sample(struct vkd3d_spirv_builder *bu const uint32_t operands[] = {sampled_image_id, coordinate_id}; if (op == SpvOpImageSampleExplicitLod) - assert(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); + VKD3D_ASSERT(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); else - assert(op == SpvOpImageSampleImplicitLod); + VKD3D_ASSERT(op == SpvOpImageSampleImplicitLod); return vkd3d_spirv_build_image_instruction(builder, op, result_type, operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); @@ -1621,9 +1621,9 @@ static uint32_t vkd3d_spirv_build_op_image_sample_dref(struct vkd3d_spirv_builde const uint32_t operands[] = {sampled_image_id, coordinate_id, dref_id}; if (op == SpvOpImageSampleDrefExplicitLod) - assert(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); + VKD3D_ASSERT(image_operands_mask & (SpvImageOperandsLodMask | SpvImageOperandsGradMask)); else - assert(op == SpvOpImageSampleDrefImplicitLod); + VKD3D_ASSERT(op == SpvOpImageSampleDrefImplicitLod); return vkd3d_spirv_build_image_instruction(builder, op, result_type, operands, ARRAY_SIZE(operands), image_operands_mask, image_operands, image_operand_count); @@ -1752,6 +1752,22 @@ static uint32_t vkd3d_spirv_get_op_scope_subgroup(struct vkd3d_spirv_builder *bu return vkd3d_spirv_build_once(builder, &builder->scope_subgroup_id, vkd3d_spirv_build_op_scope_subgroup); } +static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_swap(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t val_id, uint32_t op_id) +{ + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); + return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadSwap, result_type, + vkd3d_spirv_get_op_scope_subgroup(builder), val_id, op_id); +} + +static uint32_t vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t val_id, uint32_t index_id) +{ + vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformQuad); + return vkd3d_spirv_build_op_tr3(builder, &builder->function_stream, SpvOpGroupNonUniformQuadBroadcast, result_type, + vkd3d_spirv_get_op_scope_subgroup(builder), val_id, index_id); +} + static uint32_t vkd3d_spirv_build_op_group_nonuniform_ballot(struct vkd3d_spirv_builder *builder, uint32_t result_type, uint32_t val_id) { @@ -1884,7 +1900,7 @@ static uint32_t vkd3d_spirv_get_type_id(struct vkd3d_spirv_builder *builder, } else { - assert(component_type != VKD3D_SHADER_COMPONENT_VOID); + VKD3D_ASSERT(component_type != VKD3D_SHADER_COMPONENT_VOID); scalar_id = vkd3d_spirv_get_type_id(builder, component_type, 1); return vkd3d_spirv_get_op_type_vector(builder, scalar_id, component_count); } @@ -2140,6 +2156,8 @@ struct vkd3d_symbol_descriptor_array unsigned int set; unsigned int binding; unsigned int push_constant_index; + bool write_only; + bool coherent; }; struct vkd3d_symbol_register_data @@ -2250,7 +2268,7 @@ static void vkd3d_symbol_make_register(struct vkd3d_symbol *symbol, case VKD3DSPR_OUTPUT: case VKD3DSPR_PATCHCONST: symbol->key.reg.idx = reg->idx_count ? reg->idx[reg->idx_count - 1].offset : ~0u; - assert(!reg->idx_count || symbol->key.reg.idx != ~0u); + VKD3D_ASSERT(!reg->idx_count || symbol->key.reg.idx != ~0u); break; case VKD3DSPR_IMMCONSTBUFFER: @@ -2377,6 +2395,7 @@ struct ssa_register_info struct spirv_compiler { struct vkd3d_spirv_builder spirv_builder; + const struct vsir_program *program; struct vkd3d_shader_message_context *message_context; struct vkd3d_shader_location location; @@ -2403,6 +2422,11 @@ struct spirv_compiler struct vkd3d_push_constant_buffer_binding *push_constants; const struct vkd3d_shader_spirv_target_info *spirv_target_info; + struct + { + uint32_t buffer_id; + } *spirv_parameter_info; + bool prolog_emitted; struct shader_signature input_signature; struct shader_signature output_signature; @@ -2490,6 +2514,8 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) vkd3d_free(compiler->push_constants); vkd3d_free(compiler->descriptor_offset_ids); + vkd3d_free(compiler->spirv_parameter_info); + vkd3d_spirv_builder_free(&compiler->spirv_builder); rb_destroy(&compiler->symbol_table, vkd3d_symbol_free, NULL); @@ -2513,13 +2539,10 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, struct vkd3d_shader_message_context *message_context, uint64_t config_flags) { - const struct shader_signature *patch_constant_signature = &program->patch_constant_signature; - const struct shader_signature *output_signature = &program->output_signature; const struct vkd3d_shader_interface_info *shader_interface; const struct vkd3d_shader_descriptor_offset_info *offset_info; const struct vkd3d_shader_spirv_target_info *target_info; struct spirv_compiler *compiler; - unsigned int max_element_count; unsigned int i; if (!(compiler = vkd3d_malloc(sizeof(*compiler)))) @@ -2547,13 +2570,6 @@ static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *p compiler->spirv_target_info = target_info; } - max_element_count = max(output_signature->element_count, patch_constant_signature->element_count); - if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) - { - vkd3d_free(compiler); - return NULL; - } - vkd3d_spirv_builder_init(&compiler->spirv_builder, spirv_compiler_get_entry_point_name(compiler)); compiler->formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT @@ -2893,7 +2909,7 @@ static struct vkd3d_shader_descriptor_binding spirv_compiler_get_descriptor_bind if (is_uav_counter) { - assert(descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV); + VKD3D_ASSERT(descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV); binding_offsets = compiler->offset_info.uav_counter_offsets; for (i = 0; i < shader_interface->uav_counter_count; ++i) { @@ -3011,7 +3027,7 @@ static uint32_t spirv_compiler_get_constant(struct spirv_compiler *compiler, struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; unsigned int i; - assert(0 < component_count && component_count <= VKD3D_VEC4_SIZE); + VKD3D_ASSERT(0 < component_count && component_count <= VKD3D_VEC4_SIZE); type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); switch (component_type) @@ -3052,7 +3068,7 @@ static uint32_t spirv_compiler_get_constant64(struct spirv_compiler *compiler, struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; unsigned int i; - assert(0 < component_count && component_count <= VKD3D_DVEC2_SIZE); + VKD3D_ASSERT(0 < component_count && component_count <= VKD3D_DVEC2_SIZE); type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); if (component_type != VKD3D_SHADER_COMPONENT_DOUBLE && component_type != VKD3D_SHADER_COMPONENT_UINT64) @@ -3274,21 +3290,6 @@ static uint32_t spirv_compiler_emit_array_variable(struct spirv_compiler *compil return vkd3d_spirv_build_op_variable(builder, stream, ptr_type_id, storage_class, 0); } -static const struct vkd3d_shader_parameter *spirv_compiler_get_shader_parameter( - struct spirv_compiler *compiler, enum vkd3d_shader_parameter_name name) -{ - const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; - unsigned int i; - - for (i = 0; info && i < info->parameter_count; ++i) - { - if (info->parameters[i].name == name) - return &info->parameters[i]; - } - - return NULL; -} - static const struct vkd3d_spec_constant_info { enum vkd3d_shader_parameter_name name; @@ -3298,6 +3299,7 @@ static const struct vkd3d_spec_constant_info vkd3d_shader_parameters[] = { {VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, 1, "sample_count"}, + {VKD3D_SHADER_PARAMETER_NAME_ALPHA_TEST_REF, 0, "alpha_test_ref"}, }; static const struct vkd3d_spec_constant_info *get_spec_constant_info(enum vkd3d_shader_parameter_name name) @@ -3318,12 +3320,11 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com { if (!compiler->current_spec_constant_id) { - const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; unsigned int i, id = 0; - for (i = 0; info && i < info->parameter_count; ++i) + for (i = 0; i < compiler->program->parameter_count; ++i) { - const struct vkd3d_shader_parameter *current = &info->parameters[i]; + const struct vkd3d_shader_parameter1 *current = &compiler->program->parameters[i]; if (current->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) id = max(current->u.specialization_constant.id + 1, id); @@ -3336,7 +3337,7 @@ static uint32_t spirv_compiler_alloc_spec_constant_id(struct spirv_compiler *com } static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compiler, - enum vkd3d_shader_parameter_name name, uint32_t spec_id) + enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const struct vkd3d_spec_constant_info *info; @@ -3345,7 +3346,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile info = get_spec_constant_info(name); default_value = info ? info->default_value : 0; - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); id = vkd3d_spirv_build_op_spec_constant(builder, type_id, default_value); vkd3d_spirv_build_op_decorate1(builder, id, SpvDecorationSpecId, spec_id); @@ -3364,7 +3365,7 @@ static uint32_t spirv_compiler_emit_spec_constant(struct spirv_compiler *compile } static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler, - enum vkd3d_shader_parameter_name name, uint32_t spec_id) + enum vkd3d_shader_parameter_name name, uint32_t spec_id, enum vkd3d_data_type type) { unsigned int i; @@ -3374,30 +3375,66 @@ static uint32_t spirv_compiler_get_spec_constant(struct spirv_compiler *compiler return compiler->spec_constants[i].id; } - return spirv_compiler_emit_spec_constant(compiler, name, spec_id); + return spirv_compiler_emit_spec_constant(compiler, name, spec_id, type); +} + +static uint32_t spirv_compiler_get_buffer_parameter(struct spirv_compiler *compiler, + const struct vkd3d_shader_parameter1 *parameter, enum vkd3d_data_type type) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + unsigned int index = parameter - compiler->program->parameters; + uint32_t type_id, ptr_id, ptr_type_id; + + type_id = vkd3d_spirv_get_type_id(builder, vkd3d_component_type_from_data_type(type), 1); + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, type_id); + ptr_id = vkd3d_spirv_build_op_access_chain1(builder, ptr_type_id, + compiler->spirv_parameter_info[index].buffer_id, + spirv_compiler_get_constant_uint(compiler, 0)); + return vkd3d_spirv_build_op_load(builder, type_id, ptr_id, SpvMemoryAccessMaskNone); } -static uint32_t spirv_compiler_emit_uint_shader_parameter(struct spirv_compiler *compiler, - enum vkd3d_shader_parameter_name name) +static uint32_t spirv_compiler_emit_shader_parameter(struct spirv_compiler *compiler, + enum vkd3d_shader_parameter_name name, enum vkd3d_data_type type) { - const struct vkd3d_shader_parameter *parameter; + const struct vkd3d_shader_parameter1 *parameter; - if (!(parameter = spirv_compiler_get_shader_parameter(compiler, name))) + static const struct + { + enum vkd3d_data_type type; + } + type_map[] = + { + [VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32] = {VKD3D_DATA_FLOAT}, + [VKD3D_SHADER_PARAMETER_DATA_TYPE_UINT32] = {VKD3D_DATA_UINT}, + }; + + if (!(parameter = vsir_program_get_parameter(compiler->program, name))) { WARN("Unresolved shader parameter %#x.\n", name); goto default_parameter; } + if (type_map[parameter->data_type].type != type) + ERR("Expected data type %#x for parameter %#x, got %#x.\n", type, name, parameter->data_type); + if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_IMMEDIATE_CONSTANT) - return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); + { + if (parameter->data_type == VKD3D_SHADER_PARAMETER_DATA_TYPE_FLOAT32) + return spirv_compiler_get_constant_float(compiler, parameter->u.immediate_constant.u.f32); + else + return spirv_compiler_get_constant_uint(compiler, parameter->u.immediate_constant.u.u32); + } + if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_SPECIALIZATION_CONSTANT) - return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id); + return spirv_compiler_get_spec_constant(compiler, name, parameter->u.specialization_constant.id, type); + if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) + return spirv_compiler_get_buffer_parameter(compiler, parameter, type); FIXME("Unhandled parameter type %#x.\n", parameter->type); default_parameter: return spirv_compiler_get_spec_constant(compiler, - name, spirv_compiler_alloc_spec_constant_id(compiler)); + name, spirv_compiler_alloc_spec_constant_id(compiler), type); } static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *compiler, @@ -3409,7 +3446,7 @@ static uint32_t spirv_compiler_emit_construct_vector(struct spirv_compiler *comp uint32_t type_id, result_id; unsigned int i; - assert(val_component_idx < val_component_count); + VKD3D_ASSERT(val_component_idx < val_component_count); type_id = vkd3d_spirv_get_type_id(builder, component_type, component_count); if (val_component_count == 1) @@ -3470,11 +3507,11 @@ static bool spirv_compiler_get_register_info(struct spirv_compiler *compiler, struct vkd3d_symbol reg_symbol, *symbol; struct rb_entry *entry; - assert(!register_is_constant_or_undef(reg)); + VKD3D_ASSERT(!register_is_constant_or_undef(reg)); if (reg->type == VKD3DSPR_TEMP) { - assert(reg->idx[0].offset < compiler->temp_count); + VKD3D_ASSERT(reg->idx[0].offset < compiler->temp_count); register_info->id = compiler->temp_id + reg->idx[0].offset; register_info->storage_class = SpvStorageClassPrivate; register_info->descriptor_array = NULL; @@ -3605,7 +3642,7 @@ static void spirv_compiler_emit_dereference_register(struct spirv_compiler *comp if (reg->type == VKD3DSPR_CONSTBUFFER) { - assert(!reg->idx[0].rel_addr); + VKD3D_ASSERT(!reg->idx[0].rel_addr); if (register_info->descriptor_array) indexes[index_count++] = spirv_compiler_get_descriptor_index(compiler, reg, register_info->descriptor_array, register_info->binding_base_idx, VKD3D_SHADER_RESOURCE_BUFFER); @@ -3723,7 +3760,7 @@ static uint32_t spirv_compiler_emit_swizzle(struct spirv_compiler *compiler, { if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) { - assert(VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(swizzle, i) == val_write_mask); + VKD3D_ASSERT(VKD3DSP_WRITEMASK_0 << vsir_swizzle_get_component(swizzle, i) == val_write_mask); components[component_idx++] = val_id; } } @@ -3748,7 +3785,7 @@ static uint32_t spirv_compiler_emit_vector_shuffle(struct spirv_compiler *compil uint32_t type_id; unsigned int i; - assert(component_count <= ARRAY_SIZE(components)); + VKD3D_ASSERT(component_count <= ARRAY_SIZE(components)); for (i = 0; i < component_count; ++i) { @@ -3771,7 +3808,7 @@ static uint32_t spirv_compiler_emit_int_to_bool(struct spirv_compiler *compiler, uint32_t type_id; SpvOp op; - assert(!(condition & ~(VKD3D_SHADER_CONDITIONAL_OP_NZ | VKD3D_SHADER_CONDITIONAL_OP_Z))); + VKD3D_ASSERT(!(condition & ~(VKD3D_SHADER_CONDITIONAL_OP_NZ | VKD3D_SHADER_CONDITIONAL_OP_Z))); type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); op = condition & VKD3D_SHADER_CONDITIONAL_OP_Z ? SpvOpIEqual : SpvOpINotEqual; @@ -3901,7 +3938,7 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile uint32_t values[VKD3D_VEC4_SIZE] = {0}; unsigned int i, j; - assert(reg->type == VKD3DSPR_IMMCONST); + VKD3D_ASSERT(reg->type == VKD3DSPR_IMMCONST); if (reg->dimension == VSIR_DIMENSION_SCALAR) { @@ -3929,7 +3966,7 @@ static uint32_t spirv_compiler_emit_load_constant64(struct spirv_compiler *compi uint64_t values[VKD3D_DVEC2_SIZE] = {0}; unsigned int i, j; - assert(reg->type == VKD3DSPR_IMMCONST64); + VKD3D_ASSERT(reg->type == VKD3DSPR_IMMCONST64); if (reg->dimension == VSIR_DIMENSION_SCALAR) { @@ -3956,7 +3993,7 @@ static uint32_t spirv_compiler_emit_load_undef(struct spirv_compiler *compiler, struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; uint32_t type_id; - assert(reg->type == VKD3DSPR_UNDEF); + VKD3D_ASSERT(reg->type == VKD3DSPR_UNDEF); type_id = vkd3d_spirv_get_type_id_for_data_type(builder, reg->data_type, component_count); return vkd3d_spirv_get_op_undef(builder, type_id); @@ -3972,8 +4009,8 @@ static uint32_t spirv_compiler_emit_load_scalar(struct spirv_compiler *compiler, enum vkd3d_shader_component_type component_type; uint32_t skipped_component_mask; - assert(!register_is_constant_or_undef(reg)); - assert(vsir_write_mask_component_count(write_mask) == 1); + VKD3D_ASSERT(!register_is_constant_or_undef(reg)); + VKD3D_ASSERT(vsir_write_mask_component_count(write_mask) == 1); component_idx = vsir_write_mask_get_component_idx(write_mask); component_idx = vsir_swizzle_get_component(swizzle, component_idx); @@ -4096,8 +4133,8 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil static const struct ssa_register_info *spirv_compiler_get_ssa_register_info(const struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg) { - assert(reg->idx[0].offset < compiler->ssa_register_count); - assert(reg->idx_count == 1); + VKD3D_ASSERT(reg->idx[0].offset < compiler->ssa_register_count); + VKD3D_ASSERT(reg->idx_count == 1); return &compiler->ssa_register_info[reg->idx[0].offset]; } @@ -4105,7 +4142,7 @@ static void spirv_compiler_set_ssa_register_info(const struct spirv_compiler *co const struct vkd3d_shader_register *reg, uint32_t val_id) { unsigned int i = reg->idx[0].offset; - assert(i < compiler->ssa_register_count); + VKD3D_ASSERT(i < compiler->ssa_register_count); compiler->ssa_register_info[i].data_type = reg->data_type; compiler->ssa_register_info[i].id = val_id; } @@ -4125,10 +4162,10 @@ static uint32_t spirv_compiler_emit_load_ssa_reg(struct spirv_compiler *compiler if (!val_id) { /* Should only be from a missing instruction implementation. */ - assert(compiler->failed); + VKD3D_ASSERT(compiler->failed); return 0; } - assert(vkd3d_swizzle_is_scalar(swizzle, reg)); + VKD3D_ASSERT(vkd3d_swizzle_is_scalar(swizzle, reg)); reg_component_type = vkd3d_component_type_from_data_type(ssa->data_type); @@ -4172,6 +4209,8 @@ static uint32_t spirv_compiler_emit_load_reg(struct spirv_compiler *compiler, return spirv_compiler_emit_load_constant64(compiler, reg, swizzle, write_mask); else if (reg->type == VKD3DSPR_UNDEF) return spirv_compiler_emit_load_undef(compiler, reg, write_mask); + else if (reg->type == VKD3DSPR_PARAMETER) + return spirv_compiler_emit_shader_parameter(compiler, reg->idx[0].offset, reg->data_type); component_count = vsir_write_mask_component_count(write_mask); component_type = vkd3d_component_type_from_data_type(reg->data_type); @@ -4348,7 +4387,7 @@ static void spirv_compiler_emit_store(struct spirv_compiler *compiler, unsigned int i, src_idx, dst_idx; uint32_t type_id, dst_val_id; - assert(write_mask); + VKD3D_ASSERT(write_mask); component_count = vsir_write_mask_component_count(write_mask); dst_component_count = vsir_write_mask_component_count(dst_write_mask); @@ -4373,7 +4412,7 @@ static void spirv_compiler_emit_store(struct spirv_compiler *compiler, type_id = vkd3d_spirv_get_type_id(builder, component_type, dst_component_count); dst_val_id = vkd3d_spirv_build_op_load(builder, type_id, dst_id, SpvMemoryAccessMaskNone); - assert(component_count <= ARRAY_SIZE(components)); + VKD3D_ASSERT(component_count <= ARRAY_SIZE(components)); for (i = 0, src_idx = 0, dst_idx = 0; dst_idx < VKD3D_VEC4_SIZE; ++dst_idx) { @@ -4402,7 +4441,7 @@ static void spirv_compiler_emit_store_reg(struct spirv_compiler *compiler, uint32_t src_write_mask = write_mask; uint32_t type_id; - assert(!register_is_constant_or_undef(reg)); + VKD3D_ASSERT(!register_is_constant_or_undef(reg)); if (reg->type == VKD3DSPR_SSA) { @@ -4461,7 +4500,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, static void spirv_compiler_emit_store_dst(struct spirv_compiler *compiler, const struct vkd3d_shader_dst_param *dst, uint32_t val_id) { - assert(!(dst->modifiers & ~VKD3DSPDM_SATURATE)); + VKD3D_ASSERT(!(dst->modifiers & ~VKD3DSPDM_SATURATE)); if (dst->modifiers & VKD3DSPDM_SATURATE) val_id = spirv_compiler_emit_sat(compiler, &dst->reg, dst->write_mask, val_id); @@ -4893,7 +4932,7 @@ static uint32_t spirv_compiler_get_invocation_id(struct spirv_compiler *compiler { struct vkd3d_shader_register r; - assert(compiler->shader_type == VKD3D_SHADER_TYPE_HULL); + VKD3D_ASSERT(compiler->shader_type == VKD3D_SHADER_TYPE_HULL); vsir_register_init(&r, VKD3DSPR_OUTPOINTID, VKD3D_DATA_FLOAT, 0); return spirv_compiler_get_register_id(compiler, &r); @@ -5013,7 +5052,7 @@ static uint32_t spirv_compiler_emit_builtin_variable_v(struct spirv_compiler *co unsigned int sizes[2]; uint32_t id; - assert(size_count <= ARRAY_SIZE(sizes)); + VKD3D_ASSERT(size_count <= ARRAY_SIZE(sizes)); memcpy(sizes, array_sizes, size_count * sizeof(sizes[0])); array_sizes = sizes; sizes[0] = max(sizes[0], builtin->spirv_array_size); @@ -5175,7 +5214,7 @@ static uint32_t spirv_compiler_emit_input(struct spirv_compiler *compiler, use_private_var ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, use_private_var ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; - assert(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); + VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_var || array_sizes[0] || array_sizes[1]); spirv_compiler_put_symbol(compiler, ®_symbol); vkd3d_spirv_build_op_name(builder, var_id, reg_type == VKD3DSPR_PATCHCONST ? "vpc%u" : "v%u", element_idx); @@ -5221,8 +5260,8 @@ static void spirv_compiler_emit_input_register(struct spirv_compiler *compiler, uint32_t write_mask; uint32_t input_id; - assert(!reg->idx_count || !reg->idx[0].rel_addr); - assert(reg->idx_count < 2); + VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); + VKD3D_ASSERT(reg->idx_count < 2); if (!(builtin = get_spirv_builtin_for_register(reg->type))) { @@ -5356,8 +5395,8 @@ static void spirv_compiler_emit_output_register(struct spirv_compiler *compiler, uint32_t write_mask; uint32_t output_id; - assert(!reg->idx_count || !reg->idx[0].rel_addr); - assert(reg->idx_count < 2); + VKD3D_ASSERT(!reg->idx_count || !reg->idx[0].rel_addr); + VKD3D_ASSERT(reg->idx_count < 2); if (!(builtin = get_spirv_builtin_for_register(reg->type))) { @@ -5543,7 +5582,7 @@ static void spirv_compiler_emit_output(struct spirv_compiler *compiler, use_private_variable ? VKD3D_SHADER_COMPONENT_FLOAT : component_type, use_private_variable ? VKD3DSP_WRITEMASK_ALL : reg_write_mask); reg_symbol.info.reg.is_aggregate = array_sizes[0] || array_sizes[1]; - assert(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); + VKD3D_ASSERT(!builtin || !builtin->spirv_array_size || use_private_variable || array_sizes[0] || array_sizes[1]); spirv_compiler_put_symbol(compiler, ®_symbol); @@ -5881,7 +5920,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t function_location = spirv_compiler_get_current_function_location(compiler); vkd3d_spirv_begin_function_stream_insertion(builder, function_location); - assert(!compiler->temp_count); + VKD3D_ASSERT(!compiler->temp_count); compiler->temp_count = count; for (i = 0; i < compiler->temp_count; ++i) { @@ -5889,7 +5928,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t SpvStorageClassPrivate, VKD3D_SHADER_COMPONENT_FLOAT, VKD3D_VEC4_SIZE); if (!i) compiler->temp_id = id; - assert(id == compiler->temp_id + i); + VKD3D_ASSERT(id == compiler->temp_id + i); vkd3d_spirv_build_op_name(builder, id, "r%u", i); } @@ -5899,7 +5938,7 @@ static void spirv_compiler_emit_temps(struct spirv_compiler *compiler, uint32_t static void spirv_compiler_allocate_ssa_register_ids(struct spirv_compiler *compiler, unsigned int count) { - assert(!compiler->ssa_register_info); + VKD3D_ASSERT(!compiler->ssa_register_info); if (!(compiler->ssa_register_info = vkd3d_calloc(count, sizeof(*compiler->ssa_register_info)))) { ERR("Failed to allocate SSA register value id array, count %u.\n", count); @@ -6001,7 +6040,7 @@ static void spirv_compiler_emit_push_constant_buffers(struct spirv_compiler *com vkd3d_spirv_build_op_decorate1(builder, member_ids[j], SpvDecorationArrayStride, 4); descriptor_offsets_member_idx = j; compiler->descriptor_offsets_member_id = spirv_compiler_get_constant_uint(compiler, j); - assert(j == count - 1); + VKD3D_ASSERT(j == count - 1); } struct_id = vkd3d_spirv_build_op_type_struct(builder, member_ids, count); @@ -6041,21 +6080,54 @@ static void spirv_compiler_emit_push_constant_buffers(struct spirv_compiler *com } } +static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( + struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, + const struct vkd3d_shader_register_range *range) +{ + const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; + unsigned int register_last = (range->last == ~0u) ? range->first : range->last; + const struct vkd3d_shader_descriptor_info1 *d; + unsigned int i; + + for (i = 0; i < descriptor_info->descriptor_count; ++i) + { + d = &descriptor_info->descriptors[i]; + if (d->type == type && d->register_space == range->space && d->register_index <= range->first + && (d->count == ~0u || d->count > register_last - d->register_index)) + return d; + } + + return NULL; +} + struct vkd3d_descriptor_variable_info { const struct vkd3d_symbol *array_symbol; unsigned int binding_base_idx; }; +static void spirv_compiler_decorate_descriptor(struct spirv_compiler *compiler, + uint32_t var_id, bool write_only, bool coherent) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + + if (write_only) + vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); + if (coherent) + vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationCoherent, NULL, 0); +} + static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler *compiler, SpvStorageClass storage_class, uint32_t type_id, const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, enum vkd3d_shader_resource_type resource_type, - bool is_uav_counter, struct vkd3d_descriptor_variable_info *var_info) + bool is_uav, bool is_uav_counter, struct vkd3d_descriptor_variable_info *var_info) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; struct vkd3d_descriptor_binding_address binding_address; struct vkd3d_shader_descriptor_binding binding; + const struct vkd3d_shader_descriptor_info1 *d; uint32_t array_type_id, ptr_type_id, var_id; + bool write_only = false, coherent = false; struct vkd3d_symbol symbol; struct rb_entry *entry; @@ -6063,6 +6135,14 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * resource_type, is_uav_counter, &binding_address); var_info->binding_base_idx = binding_address.binding_base_idx; + if (is_uav) + { + d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); + write_only = !(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ); + /* ROVs are implicitly globally coherent. */ + coherent = d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW); + } + if (binding.count == 1 && range->first == binding_address.binding_base_idx && range->last != ~0u && binding_address.push_constant_index == ~0u) { @@ -6072,6 +6152,7 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * spirv_compiler_emit_descriptor_binding(compiler, var_id, &binding); spirv_compiler_emit_register_debug_name(builder, var_id, reg); + spirv_compiler_decorate_descriptor(compiler, var_id, write_only, coherent); var_info->array_symbol = NULL; return var_id; @@ -6089,6 +6170,8 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * symbol.key.descriptor_array.set = binding.set; symbol.key.descriptor_array.binding = binding.binding; symbol.key.descriptor_array.push_constant_index = binding_address.push_constant_index; + symbol.key.descriptor_array.write_only = write_only; + symbol.key.descriptor_array.coherent = coherent; if ((entry = rb_get(&compiler->symbol_table, &symbol))) { var_info->array_symbol = RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); @@ -6099,6 +6182,7 @@ static uint32_t spirv_compiler_build_descriptor_variable(struct spirv_compiler * ptr_type_id, storage_class, 0); spirv_compiler_emit_descriptor_binding(compiler, var_id, &binding); spirv_compiler_emit_register_debug_name(builder, var_id, reg); + spirv_compiler_decorate_descriptor(compiler, var_id, write_only, coherent); symbol.id = var_id; symbol.descriptor_array = NULL; @@ -6155,7 +6239,7 @@ static void spirv_compiler_emit_cbv_declaration(struct spirv_compiler *compiler, vkd3d_spirv_build_op_name(builder, struct_id, "cb%u_struct", size); var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, struct_id, - ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, &var_info); + ®, range, VKD3D_SHADER_RESOURCE_BUFFER, false, false, &var_info); vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, @@ -6212,7 +6296,7 @@ static void spirv_compiler_emit_sampler_declaration(struct spirv_compiler *compi type_id = vkd3d_spirv_get_op_type_sampler(builder); var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, - range, VKD3D_SHADER_RESOURCE_NONE, false, &var_info); + range, VKD3D_SHADER_RESOURCE_NONE, false, false, &var_info); vkd3d_symbol_make_register(®_symbol, ®); vkd3d_symbol_set_register_info(®_symbol, var_id, storage_class, @@ -6259,26 +6343,6 @@ static SpvImageFormat image_format_for_image_read(enum vkd3d_shader_component_ty } } -static const struct vkd3d_shader_descriptor_info1 *spirv_compiler_get_descriptor_info( - struct spirv_compiler *compiler, enum vkd3d_shader_descriptor_type type, - const struct vkd3d_shader_register_range *range) -{ - const struct vkd3d_shader_scan_descriptor_info1 *descriptor_info = compiler->scan_descriptor_info; - unsigned int register_last = (range->last == ~0u) ? range->first : range->last; - const struct vkd3d_shader_descriptor_info1 *d; - unsigned int i; - - for (i = 0; i < descriptor_info->descriptor_count; ++i) - { - d = &descriptor_info->descriptors[i]; - if (d->type == type && d->register_space == range->space && d->register_index <= range->first - && (d->count == ~0u || d->count > register_last - d->register_index)) - return d; - } - - return NULL; -} - static uint32_t spirv_compiler_get_image_type_id(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, const struct vkd3d_shader_register_range *range, const struct vkd3d_spirv_resource_type *resource_type_info, enum vkd3d_shader_component_type data_type, @@ -6457,7 +6521,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp } var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, type_id, ®, - range, resource_type, false, &var_info); + range, resource_type, is_uav, false, &var_info); if (is_uav) { @@ -6465,13 +6529,6 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp d = spirv_compiler_get_descriptor_info(compiler, VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, range); - if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) - vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); - - /* ROVs are implicitly globally coherent. */ - if (d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW)) - vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationCoherent, NULL, 0); - if (d->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) { if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) @@ -6488,7 +6545,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) { - assert(structure_stride); /* counters are valid only for structured buffers */ + VKD3D_ASSERT(structure_stride); /* counters are valid only for structured buffers */ counter_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); if (spirv_compiler_is_opengl_target(compiler)) @@ -6514,7 +6571,7 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp } counter_var_id = spirv_compiler_build_descriptor_variable(compiler, storage_class, - type_id, ®, range, resource_type, true, &counter_var_info); + type_id, ®, range, resource_type, false, true, &counter_var_info); } } @@ -6831,7 +6888,7 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, uint32_t function_id, void_id, function_type_id; struct vkd3d_shader_phase *phase; - assert(compiler->phase != instruction->handler_idx); + VKD3D_ASSERT(compiler->phase != instruction->opcode); if (!is_in_default_phase(compiler)) spirv_compiler_leave_shader_phase(compiler); @@ -6843,16 +6900,16 @@ static void spirv_compiler_enter_shader_phase(struct spirv_compiler *compiler, vkd3d_spirv_build_op_function(builder, void_id, function_id, SpvFunctionControlMaskNone, function_type_id); - compiler->phase = instruction->handler_idx; + compiler->phase = instruction->opcode; spirv_compiler_emit_shader_phase_name(compiler, function_id, NULL); - phase = (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) + phase = (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) ? &compiler->control_point_phase : &compiler->patch_constant_phase; phase->function_id = function_id; /* The insertion location must be set after the label is emitted. */ phase->function_location = 0; - if (instruction->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE) + if (instruction->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE) compiler->emit_default_control_point_phase = instruction->flags; } @@ -6908,7 +6965,7 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile input_reg.idx[1].offset = 0; input_id = spirv_compiler_get_register_id(compiler, &input_reg); - assert(input_signature->element_count == output_signature->element_count); + VKD3D_ASSERT(input_signature->element_count == output_signature->element_count); for (i = 0; i < output_signature->element_count; ++i) { const struct signature_element *output = &output_signature->elements[i]; @@ -6916,8 +6973,8 @@ static void spirv_compiler_emit_default_control_point_phase(struct spirv_compile struct vkd3d_shader_register_info output_reg_info; struct vkd3d_shader_register output_reg; - assert(input->mask == output->mask); - assert(input->component_type == output->component_type); + VKD3D_ASSERT(input->mask == output->mask); + VKD3D_ASSERT(input->component_type == output->component_type); input_reg.idx[1].offset = i; input_id = spirv_compiler_get_register_id(compiler, &input_reg); @@ -7016,7 +7073,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru { static const struct { - enum vkd3d_shader_opcode handler_idx; + enum vkd3d_shader_opcode opcode; SpvOp spirv_op; } alu_ops[] = @@ -7056,7 +7113,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru for (i = 0; i < ARRAY_SIZE(alu_ops); ++i) { - if (alu_ops[i].handler_idx == instruction->handler_idx) + if (alu_ops[i].opcode == instruction->opcode) return alu_ops[i].spirv_op; } @@ -7065,7 +7122,7 @@ static SpvOp spirv_compiler_map_alu_instruction(const struct vkd3d_shader_instru static SpvOp spirv_compiler_map_logical_instruction(const struct vkd3d_shader_instruction *instruction) { - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_AND: return SpvOpLogicalAnd; @@ -7085,25 +7142,25 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, const struct vkd3d_shader_src_param *src = instruction->src; uint32_t val_id; - assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); + VKD3D_ASSERT(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) { - val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); + val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); } else if (dst->reg.data_type == VKD3D_DATA_DOUBLE) { /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ - val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); + val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOF); } else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) { - val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); + val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); } else if (dst->reg.data_type == VKD3D_DATA_UINT64) { - val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); + val_id = spirv_compiler_emit_bool_to_int64(compiler, 1, val_id, instruction->opcode == VKD3DSIH_ITOI); } else { @@ -7126,7 +7183,7 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil SpvOp op = SpvOpMax; unsigned int i; - if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) + if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->opcode == VKD3DSIH_COUNTBITS) { /* At least some drivers support this anyway, but if validation is enabled it will fail. */ FIXME("Unsupported 64-bit source for bit count.\n"); @@ -7142,8 +7199,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil /* VSIR supports logic ops AND/OR/XOR on bool values. */ op = spirv_compiler_map_logical_instruction(instruction); } - else if (instruction->handler_idx == VKD3DSIH_ITOF || instruction->handler_idx == VKD3DSIH_UTOF - || instruction->handler_idx == VKD3DSIH_ITOI || instruction->handler_idx == VKD3DSIH_UTOU) + else if (instruction->opcode == VKD3DSIH_ITOF || instruction->opcode == VKD3DSIH_UTOF + || instruction->opcode == VKD3DSIH_ITOI || instruction->opcode == VKD3DSIH_UTOU) { /* VSIR supports cast from bool to signed/unsigned integer types and floating point types, * where bool is treated as a 1-bit integer and a signed 'true' value converts to -1. */ @@ -7158,14 +7215,14 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil if (op == SpvOpMax) { - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, - "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); + "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); return VKD3D_ERROR_INVALID_SHADER; } - assert(instruction->dst_count == 1); - assert(instruction->src_count <= SPIRV_MAX_SRC_COUNT); + VKD3D_ASSERT(instruction->dst_count == 1); + VKD3D_ASSERT(instruction->src_count <= SPIRV_MAX_SRC_COUNT); type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); @@ -7179,8 +7236,8 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil * Microsoft fxc will compile immediate constants larger than 5 bits. * Fixing up the constants would be more elegant, but the simplest way is * to let this handle constants too. */ - if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->handler_idx == VKD3DSIH_ISHL - || instruction->handler_idx == VKD3DSIH_ISHR || instruction->handler_idx == VKD3DSIH_USHR)) + if (!(instruction->flags & VKD3DSI_SHIFT_UNMASKED) && (instruction->opcode == VKD3DSIH_ISHL + || instruction->opcode == VKD3DSIH_ISHR || instruction->opcode == VKD3DSIH_USHR)) { uint32_t mask_id = spirv_compiler_get_constant_vector(compiler, VKD3D_SHADER_COMPONENT_UINT, vsir_write_mask_component_count(dst->write_mask), 0x1f); @@ -7218,7 +7275,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( { static const struct { - enum vkd3d_shader_opcode handler_idx; + enum vkd3d_shader_opcode opcode; enum GLSLstd450 glsl_inst; } glsl_insts[] = @@ -7258,7 +7315,7 @@ static enum GLSLstd450 spirv_compiler_map_ext_glsl_instruction( for (i = 0; i < ARRAY_SIZE(glsl_insts); ++i) { - if (glsl_insts[i].handler_idx == instruction->handler_idx) + if (glsl_insts[i].opcode == instruction->opcode) return glsl_insts[i].glsl_inst; } @@ -7276,27 +7333,27 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp unsigned int i, component_count; enum GLSLstd450 glsl_inst; - if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI - || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) + if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->opcode == VKD3DSIH_FIRSTBIT_HI + || instruction->opcode == VKD3DSIH_FIRSTBIT_LO || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI)) { /* At least some drivers support this anyway, but if validation is enabled it will fail. */ - FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); + FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->opcode); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, - "64-bit source for handler %#x is not supported.", instruction->handler_idx); + "64-bit source for handler %#x is not supported.", instruction->opcode); return; } glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); if (glsl_inst == GLSLstd450Bad) { - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); return; } instr_set_id = vkd3d_spirv_get_glsl_std450_instr_set(builder); - assert(instruction->dst_count == 1); - assert(instruction->src_count <= SPIRV_MAX_SRC_COUNT); + VKD3D_ASSERT(instruction->dst_count == 1); + VKD3D_ASSERT(instruction->src_count <= SPIRV_MAX_SRC_COUNT); type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); @@ -7306,8 +7363,8 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp val_id = vkd3d_spirv_build_op_ext_inst(builder, type_id, instr_set_id, glsl_inst, src_id, instruction->src_count); - if (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI - || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI) + if (instruction->opcode == VKD3DSIH_FIRSTBIT_HI + || instruction->opcode == VKD3DSIH_FIRSTBIT_SHI) { /* In D3D bits are numbered from the most significant bit. */ component_count = vsir_write_mask_component_count(dst->write_mask); @@ -7415,7 +7472,7 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, if (src[0].reg.data_type != VKD3D_DATA_BOOL) { - if (instruction->handler_idx == VKD3DSIH_CMP) + if (instruction->opcode == VKD3DSIH_CMP) condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); @@ -7437,7 +7494,7 @@ static void spirv_compiler_emit_swapc(struct spirv_compiler *compiler, uint32_t condition_id, src1_id, src2_id, type_id, val_id; unsigned int component_count; - assert(dst[0].write_mask == dst[1].write_mask); + VKD3D_ASSERT(dst[0].write_mask == dst[1].write_mask); condition_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); @@ -7469,14 +7526,14 @@ static void spirv_compiler_emit_dot(struct spirv_compiler *compiler, component_count = vsir_write_mask_component_count(dst->write_mask); component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); - if (instruction->handler_idx == VKD3DSIH_DP4) + if (instruction->opcode == VKD3DSIH_DP4) write_mask = VKD3DSP_WRITEMASK_ALL; - else if (instruction->handler_idx == VKD3DSIH_DP3) + else if (instruction->opcode == VKD3DSIH_DP3) write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_2; else write_mask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1; - assert(instruction->src_count == ARRAY_SIZE(src_ids)); + VKD3D_ASSERT(instruction->src_count == ARRAY_SIZE(src_ids)); for (i = 0; i < ARRAY_SIZE(src_ids); ++i) src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], write_mask); @@ -7606,8 +7663,8 @@ static void spirv_compiler_emit_int_div(struct spirv_compiler *compiler, unsigned int component_count = 0; SpvOp div_op, mod_op; - div_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; - mod_op = instruction->handler_idx == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; + div_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSDiv : SpvOpUDiv; + mod_op = instruction->opcode == VKD3DSIH_IDIV ? SpvOpSRem : SpvOpUMod; if (dst[0].reg.type != VKD3DSPR_NULL) { @@ -7668,8 +7725,8 @@ static void spirv_compiler_emit_ftoi(struct spirv_compiler *compiler, enum vkd3d_shader_component_type component_type; unsigned int component_count; - assert(instruction->dst_count == 1); - assert(instruction->src_count == 1); + VKD3D_ASSERT(instruction->dst_count == 1); + VKD3D_ASSERT(instruction->src_count == 1); /* OpConvertFToI has undefined results if the result cannot be represented * as a signed integer, but Direct3D expects the result to saturate, @@ -7721,8 +7778,8 @@ static void spirv_compiler_emit_ftou(struct spirv_compiler *compiler, uint32_t src_type_id, dst_type_id, condition_type_id; unsigned int component_count; - assert(instruction->dst_count == 1); - assert(instruction->src_count == 1); + VKD3D_ASSERT(instruction->dst_count == 1); + VKD3D_ASSERT(instruction->src_count == 1); /* OpConvertFToU has undefined results if the result cannot be represented * as an unsigned integer, but Direct3D expects the result to saturate, @@ -7770,7 +7827,7 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp SpvOp op; src_count = instruction->src_count; - assert(2 <= src_count && src_count <= ARRAY_SIZE(src_ids)); + VKD3D_ASSERT(2 <= src_count && src_count <= ARRAY_SIZE(src_ids)); component_type = vkd3d_component_type_from_data_type(dst->reg.data_type); type_id = vkd3d_spirv_get_type_id(builder, component_type, 1); @@ -7778,17 +7835,17 @@ static void spirv_compiler_emit_bitfield_instruction(struct spirv_compiler *comp mask_id = spirv_compiler_get_constant_uint(compiler, size - 1); size_id = spirv_compiler_get_constant_uint(compiler, size); - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_BFI: op = SpvOpBitFieldInsert; break; case VKD3DSIH_IBFE: op = SpvOpBitFieldSExtract; break; case VKD3DSIH_UBFE: op = SpvOpBitFieldUExtract; break; default: - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); return; } - assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); for (i = 0, k = 0; i < VKD3D_VEC4_SIZE; ++i) { if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) @@ -7832,7 +7889,7 @@ static void spirv_compiler_emit_f16tof32(struct spirv_compiler *compiler, scalar_type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_FLOAT, 1); /* FIXME: Consider a single UnpackHalf2x16 instruction per 2 components. */ - assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) { if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) @@ -7866,7 +7923,7 @@ static void spirv_compiler_emit_f32tof16(struct spirv_compiler *compiler, zero_id = spirv_compiler_get_constant_float(compiler, 0.0f); /* FIXME: Consider a single PackHalf2x16 instruction per 2 components. */ - assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) { if (!(write_mask = dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) @@ -7895,7 +7952,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co unsigned int component_count; SpvOp op; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_DEQO: case VKD3DSIH_EQO: op = SpvOpFOrdEqual; break; @@ -7916,7 +7973,7 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co case VKD3DSIH_UGE: op = SpvOpUGreaterThanEqual; break; case VKD3DSIH_ULT: op = SpvOpULessThan; break; default: - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); return; } @@ -7949,7 +8006,7 @@ static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *c src0_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src0_id); src1_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src1_id); val_id = vkd3d_spirv_build_op_logical_or(builder, type_id, src0_id, src1_id); - if (instruction->handler_idx == VKD3DSIH_ORD) + if (instruction->opcode == VKD3DSIH_ORD) val_id = vkd3d_spirv_build_op_logical_not(builder, type_id, val_id); spirv_compiler_emit_store_dst(compiler, dst, val_id); } @@ -7964,7 +8021,7 @@ static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compil unsigned int component_count; SpvOp op; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; @@ -8113,6 +8170,8 @@ static void spirv_compiler_emit_discard(struct spirv_compiler *compiler, if (src->reg.data_type != VKD3D_DATA_BOOL) condition_id = spirv_compiler_emit_int_to_bool(compiler, instruction->flags, src->reg.data_type, 1, condition_id); + else if (instruction->flags & VKD3D_SHADER_CONDITIONAL_OP_Z) + condition_id = vkd3d_spirv_build_op_logical_not(builder, vkd3d_spirv_get_op_type_bool(builder), condition_id); void_id = vkd3d_spirv_get_op_type_void(builder); vkd3d_spirv_build_op_function_call(builder, void_id, spirv_compiler_get_discard_function_id(compiler), &condition_id, 1); @@ -8262,7 +8321,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile static const struct instruction_info { - enum vkd3d_shader_opcode handler_idx; + enum vkd3d_shader_opcode opcode; SpvOp op; bool needs_derivative_control; } @@ -8279,7 +8338,7 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile info = NULL; for (i = 0; i < ARRAY_SIZE(deriv_instructions); ++i) { - if (deriv_instructions[i].handler_idx == instruction->handler_idx) + if (deriv_instructions[i].opcode == instruction->opcode) { info = &deriv_instructions[i]; break; @@ -8287,15 +8346,15 @@ static void spirv_compiler_emit_deriv_instruction(struct spirv_compiler *compile } if (!info) { - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); return; } if (info->needs_derivative_control) vkd3d_spirv_enable_capability(builder, SpvCapabilityDerivativeControl); - assert(instruction->dst_count == 1); - assert(instruction->src_count == 1); + VKD3D_ASSERT(instruction->dst_count == 1); + VKD3D_ASSERT(instruction->src_count == 1); type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); src_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); @@ -8329,7 +8388,7 @@ static const struct vkd3d_symbol *spirv_compiler_find_resource(struct spirv_comp vkd3d_symbol_make_resource(&resource_key, resource_reg); entry = rb_get(&compiler->symbol_table, &resource_key); - assert(entry); + VKD3D_ASSERT(entry); return RB_ENTRY_VALUE(entry, struct vkd3d_symbol, entry); } @@ -8438,8 +8497,8 @@ static void spirv_compiler_prepare_image(struct spirv_compiler *compiler, { struct vkd3d_shader_register_info register_info; - assert(image->image_id); - assert(sampler_reg); + VKD3D_ASSERT(image->image_id); + VKD3D_ASSERT(sampler_reg); if (!spirv_compiler_get_register_info(compiler, sampler_reg, ®ister_info)) ERR("Failed to get sampler register info.\n"); @@ -8497,7 +8556,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, uint32_t coordinate_mask; bool multisample; - multisample = instruction->handler_idx == VKD3DSIH_LD2DMS; + multisample = instruction->opcode == VKD3DSIH_LD2DMS; spirv_compiler_prepare_image(compiler, &image, &src[1].reg, NULL, VKD3D_IMAGE_FLAG_NONE); @@ -8522,7 +8581,7 @@ static void spirv_compiler_emit_ld(struct spirv_compiler *compiler, image_operands[image_operand_count++] = spirv_compiler_emit_load_src(compiler, &src[2], VKD3DSP_WRITEMASK_0); } - assert(image_operand_count <= ARRAY_SIZE(image_operands)); + VKD3D_ASSERT(image_operand_count <= ARRAY_SIZE(image_operands)); val_id = vkd3d_spirv_build_op_image_fetch(builder, type_id, image.image_id, coordinate_id, operands_mask, image_operands, image_operand_count); @@ -8576,7 +8635,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, spirv_compiler_prepare_image(compiler, &image, &resource->reg, &sampler->reg, VKD3D_IMAGE_FLAG_SAMPLED); - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_SAMPLE: op = SpvOpImageSampleImplicitLod; @@ -8603,7 +8662,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, &src[3], VKD3DSP_WRITEMASK_0); break; default: - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); return; } @@ -8616,7 +8675,7 @@ static void spirv_compiler_emit_sample(struct spirv_compiler *compiler, sampled_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], VKD3DSP_WRITEMASK_ALL); - assert(image_operand_count <= ARRAY_SIZE(image_operands)); + VKD3D_ASSERT(image_operand_count <= ARRAY_SIZE(image_operands)); val_id = vkd3d_spirv_build_op_image_sample(builder, op, sampled_type_id, image.sampled_image_id, coordinate_id, operands_mask, image_operands, image_operand_count); @@ -8637,7 +8696,7 @@ static void spirv_compiler_emit_sample_c(struct spirv_compiler *compiler, uint32_t image_operands[2]; SpvOp op; - if (instruction->handler_idx == VKD3DSIH_SAMPLE_C_LZ) + if (instruction->opcode == VKD3DSIH_SAMPLE_C_LZ) { op = SpvOpImageSampleDrefExplicitLod; operands_mask |= SpvImageOperandsLodMask; @@ -8687,12 +8746,12 @@ static void spirv_compiler_emit_gather4(struct spirv_compiler *compiler, uint32_t coordinate_mask; bool extended_offset; - if (instruction->handler_idx == VKD3DSIH_GATHER4_C - || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C) + if (instruction->opcode == VKD3DSIH_GATHER4_C + || instruction->opcode == VKD3DSIH_GATHER4_PO_C) image_flags |= VKD3D_IMAGE_FLAG_DEPTH; - extended_offset = instruction->handler_idx == VKD3DSIH_GATHER4_PO - || instruction->handler_idx == VKD3DSIH_GATHER4_PO_C; + extended_offset = instruction->opcode == VKD3DSIH_GATHER4_PO + || instruction->opcode == VKD3DSIH_GATHER4_PO_C; addr = &src[0]; offset = extended_offset ? &src[1] : NULL; @@ -8801,7 +8860,7 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler type_id, resource_symbol->info.resource.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) { if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) @@ -8833,7 +8892,7 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); texel_type_id = vkd3d_spirv_get_type_id(builder, image.sampled_type, VKD3D_VEC4_SIZE); - assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) { if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) @@ -8876,7 +8935,7 @@ static void spirv_compiler_emit_ld_tgsm(struct spirv_compiler *compiler, base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); - assert(dst->write_mask & VKD3DSP_WRITEMASK_ALL); + VKD3D_ASSERT(dst->write_mask & VKD3DSP_WRITEMASK_ALL); for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) { if (!(dst->write_mask & (VKD3DSP_WRITEMASK_0 << i))) @@ -8939,7 +8998,7 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); data = &src[instruction->src_count - 1]; - assert(data->reg.data_type == VKD3D_DATA_UINT); + VKD3D_ASSERT(data->reg.data_type == VKD3D_DATA_UINT); val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); component_count = vsir_write_mask_component_count(dst->write_mask); @@ -8963,12 +9022,11 @@ static void spirv_compiler_emit_store_uav_raw_structured(struct spirv_compiler * { type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); spirv_compiler_prepare_image(compiler, &image, &dst->reg, NULL, VKD3D_IMAGE_FLAG_NONE); - assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !image.structure_stride); base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, image.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); data = &src[instruction->src_count - 1]; - assert(data->reg.data_type == VKD3D_DATA_UINT); + VKD3D_ASSERT(data->reg.data_type == VKD3D_DATA_UINT); val_id = spirv_compiler_emit_load_src(compiler, data, dst->write_mask); component_count = vsir_write_mask_component_count(dst->write_mask); @@ -9007,7 +9065,6 @@ static void spirv_compiler_emit_store_tgsm(struct spirv_compiler *compiler, type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, reg_info.storage_class, type_id); - assert((instruction->handler_idx == VKD3DSIH_STORE_STRUCTURED) != !reg_info.structure_stride); base_coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, reg_info.structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[1], VKD3DSP_WRITEMASK_0); @@ -9145,12 +9202,12 @@ static void spirv_compiler_emit_uav_counter_instruction(struct spirv_compiler *c uint32_t operands[3]; SpvOp op; - op = instruction->handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC + op = instruction->opcode == VKD3DSIH_IMM_ATOMIC_ALLOC ? SpvOpAtomicIIncrement : SpvOpAtomicIDecrement; resource_symbol = spirv_compiler_find_resource(compiler, &src->reg); counter_id = resource_symbol->info.resource.uav_counter_id; - assert(counter_id); + VKD3D_ASSERT(counter_id); type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); @@ -9211,7 +9268,7 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins { static const struct { - enum vkd3d_shader_opcode handler_idx; + enum vkd3d_shader_opcode opcode; SpvOp spirv_op; } atomic_ops[] = @@ -9240,16 +9297,16 @@ static SpvOp spirv_compiler_map_atomic_instruction(const struct vkd3d_shader_ins for (i = 0; i < ARRAY_SIZE(atomic_ops); ++i) { - if (atomic_ops[i].handler_idx == instruction->handler_idx) + if (atomic_ops[i].opcode == instruction->opcode) return atomic_ops[i].spirv_op; } return SpvOpMax; } -static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode handler_idx) +static bool is_imm_atomic_instruction(enum vkd3d_shader_opcode opcode) { - return VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR; + return VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR; } static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compiler, @@ -9274,12 +9331,12 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil bool raw; SpvOp op; - resource = is_imm_atomic_instruction(instruction->handler_idx) ? &dst[1] : &dst[0]; + resource = is_imm_atomic_instruction(instruction->opcode) ? &dst[1] : &dst[0]; op = spirv_compiler_map_atomic_instruction(instruction); if (op == SpvOpMax) { - ERR("Unexpected instruction %#x.\n", instruction->handler_idx); + ERR("Unexpected instruction %#x.\n", instruction->opcode); return; } @@ -9315,14 +9372,14 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); if (structure_stride || raw) { - assert(!raw != !structure_stride); + VKD3D_ASSERT(!raw != !structure_stride); coordinate_id = spirv_compiler_emit_raw_structured_addressing(compiler, type_id, structure_stride, &src[0], VKD3DSP_WRITEMASK_0, &src[0], VKD3DSP_WRITEMASK_1); } else { - assert(resource->reg.type != VKD3DSPR_GROUPSHAREDMEM); + VKD3D_ASSERT(resource->reg.type != VKD3DSPR_GROUPSHAREDMEM); coordinate_id = spirv_compiler_emit_load_src(compiler, &src[0], coordinate_mask); } @@ -9360,7 +9417,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil { WARN("Ignoring 'volatile' attribute.\n"); spirv_compiler_warning(compiler, VKD3D_SHADER_WARNING_SPV_IGNORING_FLAG, - "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->handler_idx); + "Ignoring the 'volatile' attribute flag for atomic instruction %#x.", instruction->opcode); } memory_semantic = (instruction->flags & VKD3DARF_SEQ_CST) @@ -9379,7 +9436,7 @@ static void spirv_compiler_emit_atomic_instruction(struct spirv_compiler *compil result_id = vkd3d_spirv_build_op_trv(builder, &builder->function_stream, op, type_id, operands, i); - if (is_imm_atomic_instruction(instruction->handler_idx)) + if (is_imm_atomic_instruction(instruction->opcode)) spirv_compiler_emit_store_dst(compiler, dst, result_id); } @@ -9511,8 +9568,8 @@ static uint32_t spirv_compiler_emit_query_sample_count(struct spirv_compiler *co if (src->reg.type == VKD3DSPR_RASTERIZER) { - val_id = spirv_compiler_emit_uint_shader_parameter(compiler, - VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT); + val_id = spirv_compiler_emit_shader_parameter(compiler, + VKD3D_SHADER_PARAMETER_NAME_RASTERIZER_SAMPLE_COUNT, VKD3D_DATA_UINT); } else { @@ -9684,13 +9741,13 @@ static void spirv_compiler_emit_eval_attrib(struct spirv_compiler *compiler, src_ids[src_count++] = register_info.id; - if (instruction->handler_idx == VKD3DSIH_EVAL_CENTROID) + if (instruction->opcode == VKD3DSIH_EVAL_CENTROID) { op = GLSLstd450InterpolateAtCentroid; } else { - assert(instruction->handler_idx == VKD3DSIH_EVAL_SAMPLE_INDEX); + VKD3D_ASSERT(instruction->opcode == VKD3DSIH_EVAL_SAMPLE_INDEX); op = GLSLstd450InterpolateAtSample; src_ids[src_count++] = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); } @@ -9772,7 +9829,7 @@ static void spirv_compiler_emit_emit_stream(struct spirv_compiler *compiler, struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; unsigned int stream_idx; - if (instruction->handler_idx == VKD3DSIH_EMIT_STREAM) + if (instruction->opcode == VKD3DSIH_EMIT_STREAM) stream_idx = instruction->src[0].reg.idx[0].offset; else stream_idx = 0; @@ -9793,7 +9850,7 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; unsigned int stream_idx; - if (instruction->handler_idx == VKD3DSIH_CUT_STREAM) + if (instruction->opcode == VKD3DSIH_CUT_STREAM) stream_idx = instruction->src[0].reg.idx[0].offset; else stream_idx = 0; @@ -9807,9 +9864,68 @@ static void spirv_compiler_emit_cut_stream(struct spirv_compiler *compiler, vkd3d_spirv_build_op_end_primitive(builder); } -static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode handler_idx) +static uint32_t map_quad_read_across_direction(enum vkd3d_shader_opcode opcode) +{ + switch (opcode) + { + case VKD3DSIH_QUAD_READ_ACROSS_X: + return 0; + case VKD3DSIH_QUAD_READ_ACROSS_Y: + return 1; + case VKD3DSIH_QUAD_READ_ACROSS_D: + return 2; + default: + vkd3d_unreachable(); + } +} + +static void spirv_compiler_emit_quad_read_across(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, direction_type_id, direction_id, val_id; + + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, + vsir_write_mask_component_count(dst->write_mask)); + direction_type_id = vkd3d_spirv_get_type_id_for_data_type(builder, VKD3D_DATA_UINT, 1); + val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + direction_id = map_quad_read_across_direction(instruction->opcode); + direction_id = vkd3d_spirv_get_op_constant(builder, direction_type_id, direction_id); + val_id = vkd3d_spirv_build_op_group_nonuniform_quad_swap(builder, type_id, val_id, direction_id); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void spirv_compiler_emit_quad_read_lane_at(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, val_id, lane_id; + + if (!register_is_constant_or_undef(&src[1].reg)) + { + FIXME("Unsupported non-constant quad read lane index.\n"); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "Non-constant quad read lane indices are not supported."); + return; + } + + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, + vsir_write_mask_component_count(dst->write_mask)); + val_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); + lane_id = spirv_compiler_emit_load_src(compiler, &src[1], VKD3DSP_WRITEMASK_0); + val_id = vkd3d_spirv_build_op_group_nonuniform_quad_broadcast(builder, type_id, val_id, lane_id); + + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + +static SpvOp map_wave_bool_op(enum vkd3d_shader_opcode opcode) { - switch (handler_idx) + switch (opcode) { case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: return SpvOpGroupNonUniformAllEqual; @@ -9833,7 +9949,7 @@ static void spirv_compiler_emit_wave_bool_op(struct spirv_compiler *compiler, vkd3d_spirv_enable_capability(builder, SpvCapabilityGroupNonUniformVote); - op = map_wave_bool_op(instruction->handler_idx); + op = map_wave_bool_op(instruction->opcode); type_id = vkd3d_spirv_get_op_type_bool(builder); val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); val_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, @@ -9865,9 +9981,9 @@ static void spirv_compiler_emit_wave_active_ballot(struct spirv_compiler *compil spirv_compiler_emit_store_dst(compiler, dst, val_id); } -static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode handler_idx, bool is_float) +static SpvOp map_wave_alu_op(enum vkd3d_shader_opcode opcode, bool is_float) { - switch (handler_idx) + switch (opcode) { case VKD3DSIH_WAVE_ACTIVE_BIT_AND: return SpvOpGroupNonUniformBitwiseAnd; @@ -9905,7 +10021,7 @@ static void spirv_compiler_emit_wave_alu_op(struct spirv_compiler *compiler, uint32_t type_id, val_id; SpvOp op; - op = map_wave_alu_op(instruction->handler_idx, data_type_is_floating_point(src->reg.data_type)); + op = map_wave_alu_op(instruction->opcode, data_type_is_floating_point(src->reg.data_type)); type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, vsir_write_mask_component_count(dst->write_mask)); @@ -9928,7 +10044,7 @@ static void spirv_compiler_emit_wave_bit_count(struct spirv_compiler *compiler, SpvGroupOperation group_op; uint32_t type_id, val_id; - group_op = (instruction->handler_idx == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan + group_op = (instruction->opcode == VKD3DSIH_WAVE_PREFIX_BIT_COUNT) ? SpvGroupOperationExclusiveScan : SpvGroupOperationReduce; val_id = spirv_compiler_emit_group_nonuniform_ballot(compiler, instruction->src); @@ -10014,7 +10130,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, compiler->location = instruction->location; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_DCL_GLOBAL_FLAGS: spirv_compiler_emit_dcl_global_flags(compiler, instruction); @@ -10337,6 +10453,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_CUT_STREAM: spirv_compiler_emit_cut_stream(compiler, instruction); break; + case VKD3DSIH_QUAD_READ_ACROSS_D: + case VKD3DSIH_QUAD_READ_ACROSS_X: + case VKD3DSIH_QUAD_READ_ACROSS_Y: + spirv_compiler_emit_quad_read_across(compiler, instruction); + break; + case VKD3DSIH_QUAD_READ_LANE_AT: + spirv_compiler_emit_quad_read_lane_at(compiler, instruction); + break; case VKD3DSIH_WAVE_ACTIVE_ALL_EQUAL: case VKD3DSIH_WAVE_ALL_TRUE: case VKD3DSIH_WAVE_ANY_TRUE: @@ -10371,7 +10495,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_WAVE_READ_LANE_FIRST: spirv_compiler_emit_wave_read_lane_first(compiler, instruction); break; - case VKD3DSIH_DCL: case VKD3DSIH_DCL_HS_MAX_TESSFACTOR: case VKD3DSIH_DCL_INPUT_CONTROL_POINT_COUNT: case VKD3DSIH_DCL_INPUT_SGV: @@ -10381,7 +10504,6 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_DCL_OUTPUT_SIV: case VKD3DSIH_DCL_RESOURCE_RAW: case VKD3DSIH_DCL_RESOURCE_STRUCTURED: - case VKD3DSIH_DCL_SAMPLER: case VKD3DSIH_DCL_UAV_RAW: case VKD3DSIH_DCL_UAV_STRUCTURED: case VKD3DSIH_DCL_UAV_TYPED: @@ -10390,9 +10512,9 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, /* nothing to do */ break; default: - FIXME("Unhandled instruction %#x.\n", instruction->handler_idx); + FIXME("Unhandled instruction %#x.\n", instruction->opcode); spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_INVALID_HANDLER, - "Encountered invalid/unhandled instruction handler %#x.", instruction->handler_idx); + "Encountered invalid/unhandled instruction handler %#x.", instruction->opcode); break; } @@ -10476,12 +10598,16 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct struct vkd3d_shader_instruction_array instructions; enum vkd3d_shader_spirv_environment environment; enum vkd3d_result result = VKD3D_OK; - unsigned int i; + unsigned int i, max_element_count; if ((result = vsir_program_normalise(program, compiler->config_flags, compile_info, compiler->message_context)) < 0) return result; + max_element_count = max(program->output_signature.element_count, program->patch_constant_signature.element_count); + if (!(compiler->output_info = vkd3d_calloc(max_element_count, sizeof(*compiler->output_info)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + if (program->temp_count) spirv_compiler_emit_temps(compiler, program->temp_count); if (program->ssa_count) @@ -10489,9 +10615,38 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, struct spirv_compiler_emit_descriptor_declarations(compiler); + compiler->spirv_parameter_info = vkd3d_calloc(program->parameter_count, sizeof(*compiler->spirv_parameter_info)); + for (i = 0; i < program->parameter_count; ++i) + { + const struct vkd3d_shader_parameter1 *parameter = &program->parameters[i]; + + if (parameter->type == VKD3D_SHADER_PARAMETER_TYPE_BUFFER) + { + uint32_t type_id, struct_id, ptr_type_id, var_id; + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); + + struct_id = vkd3d_spirv_build_op_type_struct(builder, &type_id, 1); + vkd3d_spirv_build_op_decorate(builder, struct_id, SpvDecorationBlock, NULL, 0); + vkd3d_spirv_build_op_member_decorate1(builder, struct_id, 0, + SpvDecorationOffset, parameter->u.buffer.offset); + + ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, struct_id); + var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, + ptr_type_id, SpvStorageClassUniform, 0); + + vkd3d_spirv_build_op_decorate1(builder, var_id, SpvDecorationDescriptorSet, parameter->u.buffer.set); + vkd3d_spirv_build_op_decorate1(builder, var_id, SpvDecorationBinding, parameter->u.buffer.binding); + + compiler->spirv_parameter_info[i].buffer_id = var_id; + } + } + if (program->block_count && !spirv_compiler_init_blocks(compiler, program->block_count)) return VKD3D_ERROR_OUT_OF_MEMORY; + compiler->program = program; + instructions = program->instructions; memset(&program->instructions, 0, sizeof(program->instructions)); diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index b562e815a81..84f641cc316 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -22,6 +22,7 @@ */ #include "hlsl.h" +#include "vkd3d_shader_private.h" #define SM4_MAX_SRC_COUNT 6 #define SM4_MAX_DST_COUNT 2 @@ -780,7 +781,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui if (type != VKD3D_SM4_SHADER_DATA_IMMEDIATE_CONSTANT_BUFFER) { FIXME("Ignoring shader data type %#x.\n", type); - ins->handler_idx = VKD3DSIH_NOP; + ins->opcode = VKD3DSIH_NOP; return; } @@ -789,7 +790,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui if (icb_size % 4) { FIXME("Unexpected immediate constant buffer size %u.\n", icb_size); - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } @@ -797,7 +798,7 @@ static void shader_sm4_read_shader_data(struct vkd3d_shader_instruction *ins, ui { ERR("Failed to allocate immediate constant buffer, size %u.\n", icb_size); vkd3d_shader_parser_error(&priv->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } icb->register_idx = 0; @@ -1716,7 +1717,7 @@ static enum vkd3d_sm4_swizzle_type vkd3d_sm4_get_default_swizzle_type( const struct vkd3d_sm4_register_type_info *register_type_info = get_info_from_vkd3d_register_type(lookup, vkd3d_type); - assert(register_type_info); + VKD3D_ASSERT(register_type_info); return register_type_info->default_src_swizzle_type; } @@ -2395,16 +2396,16 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str if (!(opcode_info = get_info_from_sm4_opcode(&sm4->lookup, opcode))) { FIXME("Unrecognized opcode %#x, opcode_token 0x%08x.\n", opcode, opcode_token); - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; *ptr += len; return; } vsir_instruction_init(ins, &sm4->p.location, opcode_info->handler_idx); - if (ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->handler_idx == VKD3DSIH_HS_FORK_PHASE - || ins->handler_idx == VKD3DSIH_HS_JOIN_PHASE) - sm4->phase = ins->handler_idx; - sm4->has_control_point_phase |= ins->handler_idx == VKD3DSIH_HS_CONTROL_POINT_PHASE; + if (ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE || ins->opcode == VKD3DSIH_HS_FORK_PHASE + || ins->opcode == VKD3DSIH_HS_JOIN_PHASE) + sm4->phase = ins->opcode; + sm4->has_control_point_phase |= ins->opcode == VKD3DSIH_HS_CONTROL_POINT_PHASE; ins->flags = 0; ins->coissue = false; ins->raw = false; @@ -2417,7 +2418,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str { ERR("Failed to allocate src parameters.\n"); vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } ins->resource_type = VKD3D_SHADER_RESOURCE_NONE; @@ -2459,7 +2460,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str { ERR("Failed to allocate dst parameters.\n"); vkd3d_shader_parser_error(&sm4->p, VKD3D_SHADER_ERROR_TPF_OUT_OF_MEMORY, "Out of memory."); - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } for (i = 0; i < ins->dst_count; ++i) @@ -2467,7 +2468,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str if (!(shader_sm4_read_dst_param(sm4, &p, *ptr, map_data_type(opcode_info->dst_info[i]), &dst_params[i]))) { - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } dst_params[i].modifiers |= instruction_dst_modifier; @@ -2478,7 +2479,7 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str if (!(shader_sm4_read_src_param(sm4, &p, *ptr, map_data_type(opcode_info->src_info[i]), &src_params[i]))) { - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } } @@ -2488,12 +2489,12 @@ static void shader_sm4_read_instruction(struct vkd3d_shader_sm4_parser *sm4, str fail: *ptr = sm4->end; - ins->handler_idx = VKD3DSIH_INVALID; + ins->opcode = VKD3DSIH_INVALID; return; } static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_program *program, - const uint32_t *byte_code, size_t byte_code_size, const char *source_name, + const uint32_t *byte_code, size_t byte_code_size, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_version version; @@ -2552,9 +2553,9 @@ static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, struct vsir_pro version.minor = VKD3D_SM4_VERSION_MINOR(version_token); /* Estimate instruction count to avoid reallocation in most shaders. */ - if (!vsir_program_init(program, &version, token_count / 7u + 20)) + if (!vsir_program_init(program, compile_info, &version, token_count / 7u + 20)) return false; - vkd3d_shader_parser_init(&sm4->p, program, message_context, source_name); + vkd3d_shader_parser_init(&sm4->p, program, message_context, compile_info->source_name); sm4->ptr = sm4->start; init_sm4_lookup_tables(&sm4->lookup); @@ -2651,7 +2652,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con } if (!shader_sm4_init(&sm4, program, dxbc_desc.byte_code, dxbc_desc.byte_code_size, - compile_info->source_name, message_context)) + compile_info, message_context)) { WARN("Failed to initialise shader parser.\n"); free_dxbc_shader_desc(&dxbc_desc); @@ -2693,7 +2694,7 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con ins = &instructions->elements[instructions->count]; shader_sm4_read_instruction(&sm4, ins); - if (ins->handler_idx == VKD3DSIH_INVALID) + if (ins->opcode == VKD3DSIH_INVALID) { WARN("Encountered unrecognized or invalid instruction.\n"); vsir_program_cleanup(program); @@ -2762,6 +2763,7 @@ bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_sem {"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_DEPTHOUT, false}, {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_OUTPUT, true}, + {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, VKD3DSPR_SAMPLEMASK, false}, }; for (i = 0; i < ARRAY_SIZE(register_table); ++i) @@ -2817,6 +2819,7 @@ bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semant {"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, {"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET}, {"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH}, + {"sv_coverage", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_COVERAGE}, {"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED}, {"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID}, @@ -2885,7 +2888,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, continue; ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage); - assert(ret); + VKD3D_ASSERT(ret); if (usage == ~0u) continue; usage_idx = var->semantic.index; @@ -2896,7 +2899,7 @@ static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, } else { - assert(var->regs[HLSL_REGSET_NUMERIC].allocated); + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); reg_idx = var->regs[HLSL_REGSET_NUMERIC].id; } @@ -2973,7 +2976,7 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) switch (type->class) { case HLSL_CLASS_MATRIX: - assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); + VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) return D3D_SVC_MATRIX_COLUMNS; else @@ -2984,11 +2987,13 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) return D3D_SVC_VECTOR; case HLSL_CLASS_ARRAY: + case HLSL_CLASS_DEPTH_STENCIL_STATE: case HLSL_CLASS_DEPTH_STENCIL_VIEW: case HLSL_CLASS_EFFECT_GROUP: case HLSL_CLASS_STRUCT: case HLSL_CLASS_PASS: case HLSL_CLASS_PIXEL_SHADER: + case HLSL_CLASS_RASTERIZER_STATE: case HLSL_CLASS_RENDER_TARGET_VIEW: case HLSL_CLASS_SAMPLER: case HLSL_CLASS_STRING: @@ -2997,6 +3002,13 @@ static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type) case HLSL_CLASS_UAV: case HLSL_CLASS_VERTEX_SHADER: case HLSL_CLASS_VOID: + case HLSL_CLASS_CONSTANT_BUFFER: + case HLSL_CLASS_COMPUTE_SHADER: + case HLSL_CLASS_DOMAIN_SHADER: + case HLSL_CLASS_HULL_SHADER: + case HLSL_CLASS_GEOMETRY_SHADER: + case HLSL_CLASS_BLEND_STATE: + case HLSL_CLASS_NULL: break; } vkd3d_unreachable(); @@ -3077,7 +3089,7 @@ static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } else { - assert(array_type->class <= HLSL_CLASS_LAST_NUMERIC); + VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC); type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type))); put_u32(buffer, vkd3d_make_u32(array_type->dimy, array_type->dimx)); put_u32(buffer, vkd3d_make_u32(array_size, 0)); @@ -3098,8 +3110,6 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) { switch (type->class) { - case HLSL_CLASS_ARRAY: - return sm4_resource_type(type->e.array.type); case HLSL_CLASS_SAMPLER: return D3D_SIT_SAMPLER; case HLSL_CLASS_TEXTURE: @@ -3115,9 +3125,6 @@ static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type) static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type) { - if (type->class == HLSL_CLASS_ARRAY) - return sm4_resource_format(type->e.array.type); - switch (type->e.resource.format->e.numeric.type) { case HLSL_TYPE_DOUBLE: @@ -3142,9 +3149,6 @@ static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type) { - if (type->class == HLSL_CLASS_ARRAY) - return sm4_rdef_resource_dimension(type->e.array.type); - switch (type->sampler_dim) { case HLSL_SAMPLER_DIM_1D: @@ -3178,13 +3182,21 @@ struct extern_resource /* var is only not NULL if this resource is a whole variable, so it may be responsible for more * than one component. */ const struct hlsl_ir_var *var; + const struct hlsl_buffer *buffer; char *name; - struct hlsl_type *data_type; bool is_user_packed; + /* The data type of a single component of the resource. + * This might be different from the data type of the resource itself in 4.0 + * profiles, where an array (or multi-dimensional array) is handled as a + * single resource, unlike in 5.0. */ + struct hlsl_type *component_type; + enum hlsl_regset regset; - unsigned int id, bind_count; + unsigned int id, space, index, bind_count; + + struct vkd3d_shader_location loc; }; static int sm4_compare_extern_resources(const void *a, const void *b) @@ -3196,7 +3208,10 @@ static int sm4_compare_extern_resources(const void *a, const void *b) if ((r = vkd3d_u32_compare(aa->regset, bb->regset))) return r; - return vkd3d_u32_compare(aa->id, bb->id); + if ((r = vkd3d_u32_compare(aa->space, bb->space))) + return r; + + return vkd3d_u32_compare(aa->index, bb->index); } static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count) @@ -3220,6 +3235,7 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0; struct extern_resource *extern_resources = NULL; const struct hlsl_ir_var *var; + struct hlsl_buffer *buffer; enum hlsl_regset regset; size_t capacity = 0; char *name; @@ -3272,14 +3288,19 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un hlsl_release_string_buffer(ctx, name_buffer); extern_resources[*count].var = NULL; + extern_resources[*count].buffer = NULL; extern_resources[*count].name = name; - extern_resources[*count].data_type = component_type; extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; + extern_resources[*count].component_type = component_type; + extern_resources[*count].regset = regset; - extern_resources[*count].id = var->regs[regset].id + regset_offset; + extern_resources[*count].id = var->regs[regset].id; + extern_resources[*count].space = var->regs[regset].space; + extern_resources[*count].index = var->regs[regset].index + regset_offset; extern_resources[*count].bind_count = 1; + extern_resources[*count].loc = var->loc; ++*count; } @@ -3313,28 +3334,75 @@ static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, un } extern_resources[*count].var = var; + extern_resources[*count].buffer = NULL; extern_resources[*count].name = name; - extern_resources[*count].data_type = var->data_type; - extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type; + /* For some reason 5.1 resources aren't marked as + * user-packed, but cbuffers still are. */ + extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1) + && !!var->reg_reservation.reg_type; + + extern_resources[*count].component_type = hlsl_type_get_component_type(ctx, var->data_type, 0); extern_resources[*count].regset = r; extern_resources[*count].id = var->regs[r].id; + extern_resources[*count].space = var->regs[r].space; + extern_resources[*count].index = var->regs[r].index; extern_resources[*count].bind_count = var->bind_count[r]; + extern_resources[*count].loc = var->loc; ++*count; } } } + LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry) + { + if (!buffer->reg.allocated) + continue; + + if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1, + sizeof(*extern_resources)))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + if (!(name = hlsl_strdup(ctx, buffer->name))) + { + sm4_free_extern_resources(extern_resources, *count); + *count = 0; + return NULL; + } + + extern_resources[*count].var = NULL; + extern_resources[*count].buffer = buffer; + + extern_resources[*count].name = name; + extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type; + + extern_resources[*count].component_type = NULL; + + extern_resources[*count].regset = HLSL_REGSET_NUMERIC; + extern_resources[*count].id = buffer->reg.id; + extern_resources[*count].space = buffer->reg.space; + extern_resources[*count].index = buffer->reg.index; + extern_resources[*count].bind_count = 1; + extern_resources[*count].loc = buffer->loc; + + ++*count; + } + qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources); return extern_resources; } static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) { - unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j; + uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t); size_t cbuffers_offset, resources_offset, creator_offset, string_offset; + unsigned int cbuffer_count = 0, extern_resources_count, i, j; size_t cbuffer_position, resource_position, creator_position; const struct hlsl_profile_info *profile = ctx->profile; struct vkd3d_bytecode_buffer buffer = {0}; @@ -3354,19 +3422,15 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); - resource_count += extern_resources_count; LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) - { ++cbuffer_count; - ++resource_count; - } } put_u32(&buffer, cbuffer_count); cbuffer_position = put_u32(&buffer, 0); - put_u32(&buffer, resource_count); + put_u32(&buffer, extern_resources_count); resource_position = put_u32(&buffer, 0); put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version), target_types[profile->type])); @@ -3378,7 +3442,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11); put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */ put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */ - put_u32(&buffer, (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t)); /* size of binding desc */ + put_u32(&buffer, binding_desc_size); /* size of binding desc */ put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */ put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */ put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */ @@ -3395,55 +3459,38 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) const struct extern_resource *resource = &extern_resources[i]; uint32_t flags = 0; - if (hlsl_version_ge(ctx, 5, 1)) - hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource reflection."); - if (resource->is_user_packed) flags |= D3D_SIF_USERPACKED; put_u32(&buffer, 0); /* name */ - put_u32(&buffer, sm4_resource_type(resource->data_type)); - if (resource->regset == HLSL_REGSET_SAMPLERS) - { - put_u32(&buffer, 0); - put_u32(&buffer, 0); - put_u32(&buffer, 0); - } + if (resource->buffer) + put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); else + put_u32(&buffer, sm4_resource_type(resource->component_type)); + if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS) { - unsigned int dimx = hlsl_type_get_component_type(ctx, resource->data_type, 0)->e.resource.format->dimx; + unsigned int dimx = resource->component_type->e.resource.format->dimx; - put_u32(&buffer, sm4_resource_format(resource->data_type)); - put_u32(&buffer, sm4_rdef_resource_dimension(resource->data_type)); + put_u32(&buffer, sm4_resource_format(resource->component_type)); + put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type)); put_u32(&buffer, ~0u); /* FIXME: multisample count */ flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT; } - put_u32(&buffer, resource->id); + else + { + put_u32(&buffer, 0); + put_u32(&buffer, 0); + put_u32(&buffer, 0); + } + put_u32(&buffer, resource->index); put_u32(&buffer, resource->bind_count); put_u32(&buffer, flags); - } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - uint32_t flags = 0; - - if (!cbuffer->reg.allocated) - continue; if (hlsl_version_ge(ctx, 5, 1)) - hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource reflection."); - - if (cbuffer->reservation.reg_type) - flags |= D3D_SIF_USERPACKED; - - put_u32(&buffer, 0); /* name */ - put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER); - put_u32(&buffer, 0); /* return type */ - put_u32(&buffer, 0); /* dimension */ - put_u32(&buffer, 0); /* multisample count */ - put_u32(&buffer, cbuffer->reg.id); /* bind point */ - put_u32(&buffer, 1); /* bind count */ - put_u32(&buffer, flags); /* flags */ + { + put_u32(&buffer, resource->space); + put_u32(&buffer, resource->id); + } } for (i = 0; i < extern_resources_count; ++i) @@ -3451,16 +3498,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) const struct extern_resource *resource = &extern_resources[i]; string_offset = put_string(&buffer, resource->name); - set_u32(&buffer, resources_offset + i * 8 * sizeof(uint32_t), string_offset); - } - - LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) - { - if (!cbuffer->reg.allocated) - continue; - - string_offset = put_string(&buffer, cbuffer->name); - set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset); + set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset); } /* Buffers. */ @@ -3522,7 +3560,7 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float)); put_u32(&buffer, flags); put_u32(&buffer, 0); /* type */ - put_u32(&buffer, 0); /* FIXME: default value */ + put_u32(&buffer, 0); /* default value */ if (profile->major_version >= 5) { @@ -3546,6 +3584,41 @@ static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) set_u32(&buffer, var_offset, string_offset); write_sm4_type(ctx, &buffer, var->data_type); set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset); + + if (var->default_values) + { + unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC]; + unsigned int comp_count = hlsl_type_component_count(var->data_type); + unsigned int default_value_offset; + unsigned int k; + + default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t)); + set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset); + + for (k = 0; k < comp_count; ++k) + { + struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k); + unsigned int comp_offset; + enum hlsl_regset regset; + + if (comp_type->class == HLSL_CLASS_STRING) + { + hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Cannot write string default value."); + continue; + } + + comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set); + if (regset == HLSL_REGSET_NUMERIC) + { + if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE) + hlsl_fixme(ctx, &var->loc, "Write double default values."); + + set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t), + var->default_values[k].number.u); + } + } + } ++j; } } @@ -3611,9 +3684,9 @@ static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_mod switch (imod->type) { case VKD3D_SM4_MODIFIER_AOFFIMMI: - assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); - assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); - assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); + VKD3D_ASSERT(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7); + VKD3D_ASSERT(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7); + VKD3D_ASSERT(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7); word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT; word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT; word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT; @@ -3652,7 +3725,7 @@ struct sm4_instruction static void sm4_register_from_node(struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_ir_node *instr) { - assert(instr->reg.allocated); + VKD3D_ASSERT(instr->reg.allocated); reg->type = VKD3DSPR_TEMP; reg->dimension = VSIR_DIMENSION_VEC4; reg->idx[0].offset = instr->reg.id; @@ -3671,7 +3744,7 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id; reg->dimension = VSIR_DIMENSION_VEC4; - assert(var->regs[HLSL_REGSET_NUMERIC].allocated); + VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated); if (!var->indexable) { @@ -3690,13 +3763,13 @@ static void sm4_numeric_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_s struct vkd3d_shader_src_param *idx_src; unsigned int idx_writemask; - assert(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); + VKD3D_ASSERT(sm4_instr->idx_src_count < ARRAY_SIZE(sm4_instr->idx_srcs)); idx_src = &sm4_instr->idx_srcs[sm4_instr->idx_src_count++]; memset(idx_src, 0, sizeof(*idx_src)); reg->idx[1].rel_addr = idx_src; sm4_register_from_node(&idx_src->reg, &idx_writemask, deref->rel_offset.node); - assert(idx_writemask != 0); + VKD3D_ASSERT(idx_writemask != 0); idx_src->swizzle = swizzle_from_sm4(hlsl_swizzle_from_writemask(idx_writemask)); } } @@ -3720,42 +3793,79 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { reg->type = VKD3DSPR_RESOURCE; reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - assert(regset == HLSL_REGSET_TEXTURES); - reg->idx_count = 1; + if (hlsl_version_ge(ctx, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES); *writemask = VKD3DSP_WRITEMASK_ALL; } else if (regset == HLSL_REGSET_UAVS) { reg->type = VKD3DSPR_UAV; reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - assert(regset == HLSL_REGSET_UAVS); - reg->idx_count = 1; + if (hlsl_version_ge(ctx, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_UAVS); *writemask = VKD3DSP_WRITEMASK_ALL; } else if (regset == HLSL_REGSET_SAMPLERS) { reg->type = VKD3DSPR_SAMPLER; reg->dimension = VSIR_DIMENSION_NONE; - reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; - reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); - assert(regset == HLSL_REGSET_SAMPLERS); - reg->idx_count = 1; + if (hlsl_version_ge(ctx, 5, 1)) + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id; + reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */ + reg->idx_count = 2; + } + else + { + reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index; + reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref); + reg->idx_count = 1; + } + VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS); *writemask = VKD3DSP_WRITEMASK_ALL; } else { unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset; - assert(data_type->class <= HLSL_CLASS_VECTOR); + VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR); reg->type = VKD3DSPR_CONSTBUFFER; reg->dimension = VSIR_DIMENSION_VEC4; - reg->idx[0].offset = var->buffer->reg.id; - reg->idx[1].offset = offset / 4; - reg->idx_count = 2; + if (hlsl_version_ge(ctx, 5, 1)) + { + reg->idx[0].offset = var->buffer->reg.id; + reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */ + reg->idx[2].offset = offset / 4; + reg->idx_count = 3; + } + else + { + reg->idx[0].offset = var->buffer->reg.index; + reg->idx[1].offset = offset / 4; + reg->idx_count = 2; + } *writemask = ((1u << data_type->dimx) - 1) << (offset & 3); } } @@ -3780,7 +3890,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - assert(hlsl_reg.allocated); + VKD3D_ASSERT(hlsl_reg.allocated); reg->type = VKD3DSPR_INPUT; reg->dimension = VSIR_DIMENSION_VEC4; reg->idx[0].offset = hlsl_reg.id; @@ -3812,7 +3922,7 @@ static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct vkd3d_shader_re { struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref); - assert(hlsl_reg.allocated); + VKD3D_ASSERT(hlsl_reg.allocated); reg->type = VKD3DSPR_OUTPUT; reg->dimension = VSIR_DIMENSION_VEC4; reg->idx[0].offset = hlsl_reg.id; @@ -3948,7 +4058,7 @@ static uint32_t sm4_encode_register(const struct tpf_writer *tpf, const struct v switch (sm4_swizzle_type) { case VKD3D_SM4_SWIZZLE_NONE: - assert(sm4_swizzle || register_is_constant(reg)); + VKD3D_ASSERT(sm4_swizzle || register_is_constant(reg)); token |= (sm4_swizzle << VKD3D_SM4_WRITEMASK_SHIFT) & VKD3D_SM4_WRITEMASK_MASK; break; @@ -3980,16 +4090,16 @@ static void sm4_write_register_index(const struct tpf_writer *tpf, const struct const struct vkd3d_shader_src_param *idx_src = reg->idx[j].rel_addr; uint32_t idx_src_token; - assert(idx_src); - assert(!idx_src->modifiers); - assert(idx_src->reg.type != VKD3DSPR_IMMCONST); + VKD3D_ASSERT(idx_src); + VKD3D_ASSERT(!idx_src->modifiers); + VKD3D_ASSERT(idx_src->reg.type != VKD3DSPR_IMMCONST); idx_src_token = sm4_encode_register(tpf, &idx_src->reg, VKD3D_SM4_SWIZZLE_SCALAR, idx_src->swizzle); put_u32(buffer, idx_src_token); for (k = 0; k < idx_src->reg.idx_count; ++k) { put_u32(buffer, idx_src->reg.idx[k].offset); - assert(!idx_src->reg.idx[k].rel_addr); + VKD3D_ASSERT(!idx_src->reg.idx[k].rel_addr); } } else @@ -4139,47 +4249,76 @@ static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr, static void write_sm4_dcl_constant_buffer(const struct tpf_writer *tpf, const struct hlsl_buffer *cbuffer) { - const struct sm4_instruction instr = + size_t size = (cbuffer->used_size + 3) / 4; + + struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER, .srcs[0].reg.dimension = VSIR_DIMENSION_VEC4, .srcs[0].reg.type = VKD3DSPR_CONSTBUFFER, - .srcs[0].reg.idx[0].offset = cbuffer->reg.id, - .srcs[0].reg.idx[1].offset = (cbuffer->used_size + 3) / 4, - .srcs[0].reg.idx_count = 2, .srcs[0].swizzle = VKD3D_SHADER_NO_SWIZZLE, .src_count = 1, }; + + if (hlsl_version_ge(tpf->ctx, 5, 1)) + { + instr.srcs[0].reg.idx[0].offset = cbuffer->reg.id; + instr.srcs[0].reg.idx[1].offset = cbuffer->reg.index; + instr.srcs[0].reg.idx[2].offset = cbuffer->reg.index; /* FIXME: array end */ + instr.srcs[0].reg.idx_count = 3; + + instr.idx[0] = size; + instr.idx[1] = cbuffer->reg.space; + instr.idx_count = 2; + } + else + { + instr.srcs[0].reg.idx[0].offset = cbuffer->reg.index; + instr.srcs[0].reg.idx[1].offset = size; + instr.srcs[0].reg.idx_count = 2; + } + write_sm4_instruction(tpf, &instr); } static void write_sm4_dcl_samplers(const struct tpf_writer *tpf, const struct extern_resource *resource) { - struct hlsl_type *component_type; unsigned int i; struct sm4_instruction instr = { .opcode = VKD3D_SM4_OP_DCL_SAMPLER, .dsts[0].reg.type = VKD3DSPR_SAMPLER, - .dsts[0].reg.idx_count = 1, .dst_count = 1, }; - component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); + VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS); - if (component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) + if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON) instr.extra_bits |= VKD3D_SM4_SAMPLER_COMPARISON << VKD3D_SM4_SAMPLER_MODE_SHIFT; - assert(resource->regset == HLSL_REGSET_SAMPLERS); - for (i = 0; i < resource->bind_count; ++i) { if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used) continue; - instr.dsts[0].reg.idx[0].offset = resource->id + i; + if (hlsl_version_ge(tpf->ctx, 5, 1)) + { + VKD3D_ASSERT(!i); + instr.dsts[0].reg.idx[0].offset = resource->id; + instr.dsts[0].reg.idx[1].offset = resource->index; + instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ + instr.dsts[0].reg.idx_count = 3; + + instr.idx[0] = resource->space; + instr.idx_count = 1; + } + else + { + instr.dsts[0].reg.idx[0].offset = resource->index + i; + instr.dsts[0].reg.idx_count = 1; + } write_sm4_instruction(tpf, &instr); } } @@ -4190,11 +4329,12 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES; struct hlsl_type *component_type; struct sm4_instruction instr; + bool multisampled; unsigned int i; - assert(resource->regset == regset); + VKD3D_ASSERT(resource->regset == regset); - component_type = hlsl_type_get_component_type(tpf->ctx, resource->data_type, 0); + component_type = resource->component_type; for (i = 0; i < resource->bind_count; ++i) { @@ -4212,20 +4352,47 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex .idx_count = 1, }; + multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS + || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY; + + if (hlsl_version_lt(tpf->ctx, 4, 1) && multisampled && !component_type->sample_count) + { + hlsl_error(tpf->ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, + "Multisampled texture object declaration needs sample count for profile %s.", + tpf->ctx->profile->name); + } + + if (hlsl_version_ge(tpf->ctx, 5, 1)) + { + VKD3D_ASSERT(!i); + instr.dsts[0].reg.idx[0].offset = resource->id; + instr.dsts[0].reg.idx[1].offset = resource->index; + instr.dsts[0].reg.idx[2].offset = resource->index; /* FIXME: array end */ + instr.dsts[0].reg.idx_count = 3; + + instr.idx[1] = resource->space; + instr.idx_count = 2; + } + else + { + instr.dsts[0].reg.idx[0].offset = resource->index + i; + instr.dsts[0].reg.idx_count = 1; + } + if (uav) { - switch (resource->data_type->sampler_dim) + switch (component_type->sampler_dim) { - case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; - instr.byte_stride = resource->data_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; - break; - default: - instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; - break; + case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_STRUCTURED; + instr.byte_stride = component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC] * 4; + break; + default: + instr.opcode = VKD3D_SM5_OP_DCL_UAV_TYPED; + break; } - if (resource->data_type->e.resource.rasteriser_ordered) + if (component_type->e.resource.rasteriser_ordered) instr.opcode |= VKD3DSUF_RASTERISER_ORDERED_VIEW << VKD3D_SM5_UAV_FLAGS_SHIFT; } else @@ -4234,11 +4401,8 @@ static void write_sm4_dcl_textures(const struct tpf_writer *tpf, const struct ex } instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT); - if (component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS - || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY) - { + if (multisampled) instr.extra_bits |= component_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT; - } write_sm4_instruction(tpf, &instr); } @@ -4449,7 +4613,7 @@ static void write_sm4_unary_op_with_two_destinations(const struct tpf_writer *tp memset(&instr, 0, sizeof(instr)); instr.opcode = opcode; - assert(dst_idx < ARRAY_SIZE(instr.dsts)); + VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); sm4_dst_from_node(&instr.dsts[dst_idx], dst); instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; @@ -4508,7 +4672,7 @@ static void write_sm4_binary_op_with_two_destinations(const struct tpf_writer *t memset(&instr, 0, sizeof(instr)); instr.opcode = opcode; - assert(dst_idx < ARRAY_SIZE(instr.dsts)); + VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts)); sm4_dst_from_node(&instr.dsts[dst_idx], dst); instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL; instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE; @@ -4706,7 +4870,7 @@ static void write_sm4_sampleinfo(const struct tpf_writer *tpf, const struct hlsl const struct hlsl_ir_node *dst = &load->node; struct sm4_instruction instr; - assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); + VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; @@ -4735,7 +4899,7 @@ static void write_sm4_resinfo(const struct tpf_writer *tpf, const struct hlsl_ir return; } - assert(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); + VKD3D_ASSERT(dst->data_type->e.numeric.type == HLSL_TYPE_UINT || dst->data_type->e.numeric.type == HLSL_TYPE_FLOAT); memset(&instr, 0, sizeof(instr)); instr.opcode = VKD3D_SM4_OP_RESINFO; @@ -4789,7 +4953,7 @@ static void write_sm4_cast(const struct tpf_writer *tpf, const struct hlsl_ir_ex const struct hlsl_type *src_type = arg1->data_type; /* Narrowing casts were already lowered. */ - assert(src_type->dimx == dst_type->dimx); + VKD3D_ASSERT(src_type->dimx == dst_type->dimx); switch (dst_type->e.numeric.type) { @@ -4904,6 +5068,25 @@ static void write_sm4_store_uav_typed(const struct tpf_writer *tpf, const struct write_sm4_instruction(tpf, &instr); } +static void write_sm4_rasterizer_sample_count(const struct tpf_writer *tpf, const struct hlsl_ir_node *dst) +{ + struct sm4_instruction instr; + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO; + instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT; + + sm4_dst_from_node(&instr.dsts[0], dst); + instr.dst_count = 1; + + instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER; + instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4; + instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X); + instr.src_count = 1; + + write_sm4_instruction(tpf, &instr); +} + static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_expr *expr) { const struct hlsl_ir_node *arg1 = expr->operands[0].node; @@ -4912,13 +5095,21 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex const struct hlsl_type *dst_type = expr->node.data_type; struct vkd3d_string_buffer *dst_type_string; - assert(expr->node.reg.allocated); + VKD3D_ASSERT(expr->node.reg.allocated); if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type))) return; switch (expr->op) { + case HLSL_OP0_RASTERIZER_SAMPLE_COUNT: + if (tpf->ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && hlsl_version_ge(tpf->ctx, 4, 1)) + write_sm4_rasterizer_sample_count(tpf, &expr->node); + else + hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE, + "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher."); + break; + case HLSL_OP1_ABS: switch (dst_type->e.numeric.type) { @@ -4932,7 +5123,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex break; case HLSL_OP1_BIT_NOT: - assert(type_is_integer(dst_type)); + VKD3D_ASSERT(type_is_integer(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); break; @@ -4941,67 +5132,73 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex break; case HLSL_OP1_CEIL: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_PI, &expr->node, arg1, 0); break; case HLSL_OP1_COS: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1); break; case HLSL_OP1_DSX: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0); break; case HLSL_OP1_DSX_COARSE: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0); break; case HLSL_OP1_DSX_FINE: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0); break; case HLSL_OP1_DSY: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0); break; case HLSL_OP1_DSY_COARSE: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0); break; case HLSL_OP1_DSY_FINE: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0); break; case HLSL_OP1_EXP2: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0); break; + case HLSL_OP1_F16TOF32: + VKD3D_ASSERT(type_is_float(dst_type)); + VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0)); + write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0); + break; + case HLSL_OP1_FLOOR: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0); break; case HLSL_OP1_FRACT: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0); break; case HLSL_OP1_LOG2: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0); break; case HLSL_OP1_LOGIC_NOT: - assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0); break; @@ -5022,39 +5219,77 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex } break; + case HLSL_OP1_RCP: + switch (dst_type->e.numeric.type) + { + case HLSL_TYPE_FLOAT: + /* SM5 comes with a RCP opcode */ + if (tpf->ctx->profile->major_version >= 5) + { + write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0); + } + else + { + /* For SM4, implement as DIV dst, 1.0, src */ + struct sm4_instruction instr; + struct hlsl_constant_value one; + + VKD3D_ASSERT(type_is_float(dst_type)); + + memset(&instr, 0, sizeof(instr)); + instr.opcode = VKD3D_SM4_OP_DIV; + + sm4_dst_from_node(&instr.dsts[0], &expr->node); + instr.dst_count = 1; + + for (unsigned int i = 0; i < 4; i++) + one.u[i].f = 1.0f; + sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask); + sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask); + instr.src_count = 2; + + write_sm4_instruction(tpf, &instr); + } + break; + + default: + hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer); + } + break; + case HLSL_OP1_REINTERPRET: write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0); break; case HLSL_OP1_ROUND: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0); break; case HLSL_OP1_RSQ: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0); break; case HLSL_OP1_SAT: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT), &expr->node, arg1, 0); break; case HLSL_OP1_SIN: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1); break; case HLSL_OP1_SQRT: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0); break; case HLSL_OP1_TRUNC: - assert(type_is_float(dst_type)); + VKD3D_ASSERT(type_is_float(dst_type)); write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0); break; @@ -5076,17 +5311,17 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex break; case HLSL_OP2_BIT_AND: - assert(type_is_integer(dst_type)); + VKD3D_ASSERT(type_is_integer(dst_type)); write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); break; case HLSL_OP2_BIT_OR: - assert(type_is_integer(dst_type)); + VKD3D_ASSERT(type_is_integer(dst_type)); write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); break; case HLSL_OP2_BIT_XOR: - assert(type_is_integer(dst_type)); + VKD3D_ASSERT(type_is_integer(dst_type)); write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2); break; @@ -5139,7 +5374,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex { const struct hlsl_type *src_type = arg1->data_type; - assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); switch (src_type->e.numeric.type) { @@ -5165,7 +5400,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex { const struct hlsl_type *src_type = arg1->data_type; - assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); switch (src_type->e.numeric.type) { @@ -5194,7 +5429,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex { const struct hlsl_type *src_type = arg1->data_type; - assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); switch (src_type->e.numeric.type) { @@ -5220,18 +5455,18 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex } case HLSL_OP2_LOGIC_AND: - assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2); break; case HLSL_OP2_LOGIC_OR: - assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2); break; case HLSL_OP2_LSHIFT: - assert(type_is_integer(dst_type)); - assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); + VKD3D_ASSERT(type_is_integer(dst_type)); + VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2); break; @@ -5310,7 +5545,7 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex { const struct hlsl_type *src_type = arg1->data_type; - assert(dst_type->e.numeric.type == HLSL_TYPE_BOOL); + VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL); switch (src_type->e.numeric.type) { @@ -5333,8 +5568,8 @@ static void write_sm4_expr(const struct tpf_writer *tpf, const struct hlsl_ir_ex } case HLSL_OP2_RSHIFT: - assert(type_is_integer(dst_type)); - assert(dst_type->e.numeric.type != HLSL_TYPE_BOOL); + VKD3D_ASSERT(type_is_integer(dst_type)); + VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL); write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR, &expr->node, arg1, arg2); break; @@ -5358,7 +5593,7 @@ static void write_sm4_if(const struct tpf_writer *tpf, const struct hlsl_ir_if * .src_count = 1, }; - assert(iff->condition.node->data_type->dimx == 1); + VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1); sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL); write_sm4_instruction(tpf, &instr); @@ -5436,7 +5671,7 @@ static void write_sm4_load(const struct tpf_writer *tpf, const struct hlsl_ir_lo sm4_dst_from_node(&instr.dsts[0], &load->node); instr.dst_count = 1; - assert(hlsl_is_numeric_type(type)); + VKD3D_ASSERT(hlsl_is_numeric_type(type)); if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(tpf->ctx, load->src.var)) { struct hlsl_constant_value value; @@ -5553,7 +5788,7 @@ static void write_sm4_resource_load(const struct tpf_writer *tpf, const struct h case HLSL_RESOURCE_SAMPLE_LOD_BIAS: case HLSL_RESOURCE_SAMPLE_GRAD: /* Combined sample expressions were lowered. */ - assert(load->sampler.var); + VKD3D_ASSERT(load->sampler.var); write_sm4_sample(tpf, load); break; @@ -5706,7 +5941,7 @@ static void write_sm4_block(const struct tpf_writer *tpf, const struct hlsl_bloc if (!instr->reg.allocated) { - assert(instr->type == HLSL_IR_CONSTANT); + VKD3D_ASSERT(instr->type == HLSL_IR_CONSTANT); continue; } } @@ -5799,21 +6034,13 @@ static void write_sm4_shdr(struct hlsl_ctx *ctx, LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry) { if (cbuffer->reg.allocated) - { - if (hlsl_version_ge(ctx, 5, 1)) - hlsl_fixme(ctx, &cbuffer->loc, "Shader model 5.1 resource definition."); - write_sm4_dcl_constant_buffer(&tpf, cbuffer); - } } for (i = 0; i < extern_resources_count; ++i) { const struct extern_resource *resource = &extern_resources[i]; - if (hlsl_version_ge(ctx, 5, 1)) - hlsl_fixme(ctx, &resource->var->loc, "Shader model 5.1 resource declaration."); - if (resource->regset == HLSL_REGSET_SAMPLERS) write_sm4_dcl_samplers(&tpf, resource); else if (resource->regset == HLSL_REGSET_TEXTURES) @@ -5875,7 +6102,7 @@ static void write_sm4_sfi0(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc) extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count); for (unsigned int i = 0; i < extern_resources_count; ++i) { - if (extern_resources[i].data_type->e.resource.rasteriser_ordered) + if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered) *flags |= VKD3D_SM4_REQUIRES_ROVS; } sm4_free_extern_resources(extern_resources, extern_resources_count); diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index 14a3fa778e5..3c1ffcdbee3 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ #include #include +/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ + static inline int char_to_int(char c) { if ('0' <= c && c <= '9') @@ -60,7 +62,7 @@ void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer) buffer->buffer_size = 16; buffer->content_size = 0; buffer->buffer = vkd3d_malloc(buffer->buffer_size); - assert(buffer->buffer); + VKD3D_ASSERT(buffer->buffer); memset(buffer->buffer, 0, buffer->buffer_size); } @@ -228,7 +230,7 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *cache, struct { if (!buffer) return; - assert(cache->count + 1 <= cache->max_count); + VKD3D_ASSERT(cache->count + 1 <= cache->max_count); cache->buffers[cache->count++] = buffer; } @@ -429,7 +431,7 @@ static void bytecode_set_bytes(struct vkd3d_bytecode_buffer *buffer, size_t offs if (buffer->status) return; - assert(vkd3d_bound_range(offset, size, buffer->size)); + VKD3D_ASSERT(vkd3d_bound_range(offset, size, buffer->size)); memcpy(buffer->data + offset, value, size); } @@ -642,7 +644,7 @@ static bool vkd3d_shader_signature_from_shader_signature(struct vkd3d_shader_sig signature->element_count = src->element_count; if (!src->elements) { - assert(!signature->element_count); + VKD3D_ASSERT(!signature->element_count); signature->elements = NULL; return true; } @@ -787,7 +789,7 @@ static struct vkd3d_shader_cf_info *vkd3d_shader_scan_push_cf_info(struct vkd3d_ static void vkd3d_shader_scan_pop_cf_info(struct vkd3d_shader_scan_context *context) { - assert(context->cf_info_count); + VKD3D_ASSERT(context->cf_info_count); --context->cf_info_count; } @@ -847,12 +849,13 @@ static void vkd3d_shader_scan_add_uav_flag(const struct vkd3d_shader_scan_contex static bool vkd3d_shader_instruction_is_uav_read(const struct vkd3d_shader_instruction *instruction) { - enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; - return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) - || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR) - || handler_idx == VKD3DSIH_LD_UAV_TYPED - || (handler_idx == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) - || (handler_idx == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); + enum vkd3d_shader_opcode opcode = instruction->opcode; + + return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) + || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR) + || opcode == VKD3DSIH_LD_UAV_TYPED + || (opcode == VKD3DSIH_LD_RAW && instruction->src[1].reg.type == VKD3DSPR_UAV) + || (opcode == VKD3DSIH_LD_STRUCTURED && instruction->src[2].reg.type == VKD3DSPR_UAV); } static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context *context, @@ -863,9 +866,9 @@ static void vkd3d_shader_scan_record_uav_read(struct vkd3d_shader_scan_context * static bool vkd3d_shader_instruction_is_uav_counter(const struct vkd3d_shader_instruction *instruction) { - enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; - return handler_idx == VKD3DSIH_IMM_ATOMIC_ALLOC - || handler_idx == VKD3DSIH_IMM_ATOMIC_CONSUME; + enum vkd3d_shader_opcode opcode = instruction->opcode; + + return opcode == VKD3DSIH_IMM_ATOMIC_ALLOC || opcode == VKD3DSIH_IMM_ATOMIC_CONSUME; } static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_context *context, @@ -876,9 +879,10 @@ static void vkd3d_shader_scan_record_uav_counter(struct vkd3d_shader_scan_contex static bool vkd3d_shader_instruction_is_uav_atomic_op(const struct vkd3d_shader_instruction *instruction) { - enum vkd3d_shader_opcode handler_idx = instruction->handler_idx; - return (VKD3DSIH_ATOMIC_AND <= handler_idx && handler_idx <= VKD3DSIH_ATOMIC_XOR) - || (VKD3DSIH_IMM_ATOMIC_ALLOC <= handler_idx && handler_idx <= VKD3DSIH_IMM_ATOMIC_XOR); + enum vkd3d_shader_opcode opcode = instruction->opcode; + + return (VKD3DSIH_ATOMIC_AND <= opcode && opcode <= VKD3DSIH_ATOMIC_XOR) + || (VKD3DSIH_IMM_ATOMIC_ALLOC <= opcode && opcode <= VKD3DSIH_IMM_ATOMIC_XOR); } static void vkd3d_shader_scan_record_uav_atomic_op(struct vkd3d_shader_scan_context *context, @@ -1130,7 +1134,7 @@ static int vkd3d_shader_scan_instruction(struct vkd3d_shader_scan_context *conte context->location = instruction->location; - switch (instruction->handler_idx) + switch (instruction->opcode) { case VKD3DSIH_DCL_CONSTANT_BUFFER: vkd3d_shader_scan_constant_buffer_declaration(context, instruction); @@ -2063,7 +2067,7 @@ bool shader_instruction_array_reserve(struct vkd3d_shader_instruction_array *ins bool shader_instruction_array_insert_at(struct vkd3d_shader_instruction_array *instructions, unsigned int idx, unsigned int count) { - assert(idx <= instructions->count); + VKD3D_ASSERT(idx <= instructions->count); if (!shader_instruction_array_reserve(instructions, instructions->count + count)) return false; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 29b8d6ad022..ef66a8ca07a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -51,7 +51,6 @@ #include "vkd3d_shader.h" #include "wine/list.h" -#include #include #include #include @@ -151,6 +150,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, + VKD3D_SHADER_ERROR_HLSL_INVALID_STATE_BLOCK_ENTRY = 5031, + VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL = 5032, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -455,6 +456,10 @@ enum vkd3d_shader_opcode VKD3DSIH_PHASE, VKD3DSIH_PHI, VKD3DSIH_POW, + VKD3DSIH_QUAD_READ_ACROSS_D, + VKD3DSIH_QUAD_READ_ACROSS_X, + VKD3DSIH_QUAD_READ_ACROSS_Y, + VKD3DSIH_QUAD_READ_LANE_AT, VKD3DSIH_RCP, VKD3DSIH_REP, VKD3DSIH_RESINFO, @@ -613,6 +618,7 @@ enum vkd3d_shader_register_type VKD3DSPR_SSA, VKD3DSPR_WAVELANECOUNT, VKD3DSPR_WAVELANEINDEX, + VKD3DSPR_PARAMETER, VKD3DSPR_COUNT, @@ -805,6 +811,7 @@ enum vkd3d_tessellator_domain #define VKD3DSI_NONE 0x0 #define VKD3DSI_TEXLD_PROJECT 0x1 +#define VKD3DSI_TEXLD_BIAS 0x2 #define VKD3DSI_INDEXED_DYNAMIC 0x4 #define VKD3DSI_RESINFO_RCP_FLOAT 0x1 #define VKD3DSI_RESINFO_UINT 0x2 @@ -1189,7 +1196,7 @@ struct vkd3d_shader_location struct vkd3d_shader_instruction { struct vkd3d_shader_location location; - enum vkd3d_shader_opcode handler_idx; + enum vkd3d_shader_opcode opcode; uint32_t flags; unsigned int dst_count; unsigned int src_count; @@ -1238,8 +1245,8 @@ static inline bool vkd3d_shader_ver_le(const struct vkd3d_shader_version *v, uns return v->major < major || (v->major == major && v->minor <= minor); } -void vsir_instruction_init(struct vkd3d_shader_instruction *ins, const struct vkd3d_shader_location *location, - enum vkd3d_shader_opcode handler_idx); +void vsir_instruction_init(struct vkd3d_shader_instruction *ins, + const struct vkd3d_shader_location *location, enum vkd3d_shader_opcode opcode); static inline bool vkd3d_shader_instruction_has_texel_offset(const struct vkd3d_shader_instruction *ins) { @@ -1303,14 +1310,14 @@ void *shader_param_allocator_get(struct vkd3d_shader_param_allocator *allocator, static inline struct vkd3d_shader_src_param *shader_src_param_allocator_get( struct vkd3d_shader_param_allocator *allocator, unsigned int count) { - assert(allocator->stride == sizeof(struct vkd3d_shader_src_param)); + VKD3D_ASSERT(allocator->stride == sizeof(struct vkd3d_shader_src_param)); return shader_param_allocator_get(allocator, count); } static inline struct vkd3d_shader_dst_param *shader_dst_param_allocator_get( struct vkd3d_shader_param_allocator *allocator, unsigned int count) { - assert(allocator->stride == sizeof(struct vkd3d_shader_dst_param)); + VKD3D_ASSERT(allocator->stride == sizeof(struct vkd3d_shader_dst_param)); return shader_param_allocator_get(allocator, count); } @@ -1355,6 +1362,10 @@ struct vsir_program struct shader_signature output_signature; struct shader_signature patch_constant_signature; + unsigned int parameter_count; + const struct vkd3d_shader_parameter1 *parameters; + bool free_parameters; + unsigned int input_control_point_count, output_control_point_count; unsigned int flat_constant_count[3]; unsigned int block_count; @@ -1370,7 +1381,10 @@ void vsir_program_cleanup(struct vsir_program *program); int vsir_program_compile(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); +const struct vkd3d_shader_parameter1 *vsir_program_get_parameter( + const struct vsir_program *program, enum vkd3d_shader_parameter_name name); +bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, + const struct vkd3d_shader_version *version, unsigned int reserve); enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, @@ -1663,7 +1677,7 @@ static inline unsigned int vsir_write_mask_get_component_idx(uint32_t write_mask { unsigned int i; - assert(write_mask); + VKD3D_ASSERT(write_mask); for (i = 0; i < VKD3D_VEC4_SIZE; ++i) { if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) @@ -1677,13 +1691,13 @@ static inline unsigned int vsir_write_mask_get_component_idx(uint32_t write_mask static inline unsigned int vsir_write_mask_component_count(uint32_t write_mask) { unsigned int count = vkd3d_popcount(write_mask & VKD3DSP_WRITEMASK_ALL); - assert(1 <= count && count <= VKD3D_VEC4_SIZE); + VKD3D_ASSERT(1 <= count && count <= VKD3D_VEC4_SIZE); return count; } static inline unsigned int vkd3d_write_mask_from_component_count(unsigned int component_count) { - assert(component_count <= VKD3D_VEC4_SIZE); + VKD3D_ASSERT(component_count <= VKD3D_VEC4_SIZE); return (VKD3DSP_WRITEMASK_0 << component_count) - 1; } diff --git a/libs/vkd3d/libs/vkd3d/cache.c b/libs/vkd3d/libs/vkd3d/cache.c index a0a29ed30cb..11d87ac1d98 100644 --- a/libs/vkd3d/libs/vkd3d/cache.c +++ b/libs/vkd3d/libs/vkd3d/cache.c @@ -69,7 +69,14 @@ static int vkd3d_shader_cache_compare_key(const void *key, const struct rb_entry static void vkd3d_shader_cache_add_entry(struct vkd3d_shader_cache *cache, struct shader_cache_entry *e) { - rb_put(&cache->tree, &e->h.hash, &e->entry); + const struct shader_cache_key k = + { + .hash = e->h.hash, + .key_size = e->h.key_size, + .key = e->payload + }; + + rb_put(&cache->tree, &k, &e->entry); } int vkd3d_shader_open_cache(struct vkd3d_shader_cache **cache) diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c index 95366d3441b..dcc7690876f 100644 --- a/libs/vkd3d/libs/vkd3d/command.c +++ b/libs/vkd3d/libs/vkd3d/command.c @@ -93,7 +93,7 @@ VkQueue vkd3d_queue_acquire(struct vkd3d_queue *queue) vkd3d_mutex_lock(&queue->mutex); - assert(queue->vk_queue); + VKD3D_ASSERT(queue->vk_queue); return queue->vk_queue; } @@ -423,7 +423,7 @@ static HRESULT vkd3d_fence_worker_stop(struct vkd3d_fence_worker *worker, static const struct d3d12_root_parameter *root_signature_get_parameter( const struct d3d12_root_signature *root_signature, unsigned int index) { - assert(index < root_signature->parameter_count); + VKD3D_ASSERT(index < root_signature->parameter_count); return &root_signature->parameters[index]; } @@ -431,7 +431,7 @@ static const struct d3d12_root_descriptor_table *root_signature_get_descriptor_t const struct d3d12_root_signature *root_signature, unsigned int index) { const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); - assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE); + VKD3D_ASSERT(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE); return &p->u.descriptor_table; } @@ -439,7 +439,7 @@ static const struct d3d12_root_constant *root_signature_get_32bit_constants( const struct d3d12_root_signature *root_signature, unsigned int index) { const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); - assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS); + VKD3D_ASSERT(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS); return &p->u.constant; } @@ -447,7 +447,7 @@ static const struct d3d12_root_parameter *root_signature_get_root_descriptor( const struct d3d12_root_signature *root_signature, unsigned int index) { const struct d3d12_root_parameter *p = root_signature_get_parameter(root_signature, index); - assert(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV + VKD3D_ASSERT(p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_SRV || p->parameter_type == D3D12_ROOT_PARAMETER_TYPE_UAV); return p; @@ -528,7 +528,7 @@ static void d3d12_fence_garbage_collect_vk_semaphores_locked(struct d3d12_fence if (current->u.binary.vk_fence) WARN("Destroying potentially pending semaphore.\n"); - assert(!current->u.binary.is_acquired); + VKD3D_ASSERT(!current->u.binary.is_acquired); VK_CALL(vkDestroySemaphore(device->vk_device, current->u.binary.vk_semaphore, NULL)); fence->semaphores[i] = fence->semaphores[--fence->semaphore_count]; @@ -599,7 +599,7 @@ static void d3d12_fence_remove_vk_semaphore(struct d3d12_fence *fence, struct vk { vkd3d_mutex_lock(&fence->mutex); - assert(semaphore->u.binary.is_acquired); + VKD3D_ASSERT(semaphore->u.binary.is_acquired); *semaphore = fence->semaphores[--fence->semaphore_count]; @@ -610,7 +610,7 @@ static void d3d12_fence_release_vk_semaphore(struct d3d12_fence *fence, struct v { vkd3d_mutex_lock(&fence->mutex); - assert(semaphore->u.binary.is_acquired); + VKD3D_ASSERT(semaphore->u.binary.is_acquired); semaphore->u.binary.is_acquired = false; vkd3d_mutex_unlock(&fence->mutex); @@ -1154,7 +1154,7 @@ static struct d3d12_fence *unsafe_impl_from_ID3D12Fence(ID3D12Fence *iface) if (!(iface1 = (ID3D12Fence1 *)iface)) return NULL; - assert(iface1->lpVtbl == &d3d12_fence_vtbl); + VKD3D_ASSERT(iface1->lpVtbl == &d3d12_fence_vtbl); return impl_from_ID3D12Fence1(iface1); } @@ -1792,7 +1792,7 @@ static struct d3d12_command_allocator *unsafe_impl_from_ID3D12CommandAllocator(I { if (!iface) return NULL; - assert(iface->lpVtbl == &d3d12_command_allocator_vtbl); + VKD3D_ASSERT(iface->lpVtbl == &d3d12_command_allocator_vtbl); return impl_from_ID3D12CommandAllocator(iface); } @@ -1942,9 +1942,9 @@ static void d3d12_command_signature_decref(struct d3d12_command_signature *signa } /* ID3D12CommandList */ -static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList5(ID3D12GraphicsCommandList5 *iface) +static inline struct d3d12_command_list *impl_from_ID3D12GraphicsCommandList6(ID3D12GraphicsCommandList6 *iface) { - return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList5_iface); + return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList6_iface); } static void d3d12_command_list_invalidate_current_framebuffer(struct d3d12_command_list *list) @@ -2025,7 +2025,8 @@ static void d3d12_command_list_invalidate_root_parameters(struct d3d12_command_l static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, unsigned int stencil_state, const struct d3d12_resource *resource, VkQueueFlags vk_queue_flags, const struct vkd3d_vulkan_info *vk_info, - VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout) + VkAccessFlags *access_mask, VkPipelineStageFlags *stage_flags, VkImageLayout *image_layout, + struct d3d12_device *device) { bool is_swapchain_image = resource && (resource->flags & VKD3D_RESOURCE_PRESENT_STATE_TRANSITION); VkPipelineStageFlags queue_shader_stages = 0; @@ -2033,10 +2034,12 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, if (vk_queue_flags & VK_QUEUE_GRAPHICS_BIT) { queue_shader_stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT - | VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT - | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT - | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; + if (device->vk_info.geometry_shaders) + queue_shader_stages |= VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT; + if (device->vk_info.tessellation_shaders) + queue_shader_stages |= VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT + | VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT; } if (vk_queue_flags & VK_QUEUE_COMPUTE_BIT) queue_shader_stages |= VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT; @@ -2054,7 +2057,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, { if (resource->present_state != D3D12_RESOURCE_STATE_PRESENT) return vk_barrier_parameters_from_d3d12_resource_state(resource->present_state, 0, - resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout); + resource, vk_queue_flags, vk_info, access_mask, stage_flags, image_layout, device); *access_mask = VK_ACCESS_MEMORY_READ_BIT; *stage_flags = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; @@ -2165,7 +2168,7 @@ static bool vk_barrier_parameters_from_d3d12_resource_state(unsigned int state, } /* Handle read-only states. */ - assert(!is_write_resource_state(state)); + VKD3D_ASSERT(!is_write_resource_state(state)); if (state & D3D12_RESOURCE_STATE_VERTEX_AND_CONSTANT_BUFFER) { @@ -2239,7 +2242,7 @@ static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12 VkPipelineStageFlags src_stage_mask, dst_stage_mask; VkImageMemoryBarrier barrier; - assert(d3d12_resource_is_texture(resource)); + VKD3D_ASSERT(d3d12_resource_is_texture(resource)); barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; barrier.pNext = NULL; @@ -2251,7 +2254,8 @@ static void d3d12_command_list_transition_resource_to_initial_state(struct d3d12 VK_IMAGE_LAYOUT_PREINITIALIZED : VK_IMAGE_LAYOUT_UNDEFINED; if (!vk_barrier_parameters_from_d3d12_resource_state(resource->initial_state, 0, - resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, &dst_stage_mask, &barrier.newLayout)) + resource, list->vk_queue_flags, vk_info, &barrier.dstAccessMask, + &dst_stage_mask, &barrier.newLayout, list->device)) { FIXME("Unhandled state %#x.\n", resource->initial_state); return; @@ -2285,12 +2289,13 @@ static void d3d12_command_list_track_resource_usage(struct d3d12_command_list *l } } -static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList5 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12GraphicsCommandList6 *iface, REFIID iid, void **object) { TRACE("iface %p, iid %s, object %p.\n", iface, debugstr_guid(iid), object); - if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList5) + if (IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList6) + || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList5) || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList4) || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList3) || IsEqualGUID(iid, &IID_ID3D12GraphicsCommandList2) @@ -2301,7 +2306,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic || IsEqualGUID(iid, &IID_ID3D12Object) || IsEqualGUID(iid, &IID_IUnknown)) { - ID3D12GraphicsCommandList5_AddRef(iface); + ID3D12GraphicsCommandList6_AddRef(iface); *object = iface; return S_OK; } @@ -2312,9 +2317,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_QueryInterface(ID3D12Graphic return E_NOINTERFACE; } -static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList5 *iface) +static ULONG STDMETHODCALLTYPE d3d12_command_list_AddRef(ID3D12GraphicsCommandList6 *iface) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); unsigned int refcount = vkd3d_atomic_increment_u32(&list->refcount); TRACE("%p increasing refcount to %u.\n", list, refcount); @@ -2327,9 +2332,9 @@ static void vkd3d_pipeline_bindings_cleanup(struct vkd3d_pipeline_bindings *bind vkd3d_free(bindings->vk_uav_counter_views); } -static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList5 *iface) +static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandList6 *iface) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); unsigned int refcount = vkd3d_atomic_decrement_u32(&list->refcount); TRACE("%p decreasing refcount to %u.\n", list, refcount); @@ -2355,66 +2360,67 @@ static ULONG STDMETHODCALLTYPE d3d12_command_list_Release(ID3D12GraphicsCommandL return refcount; } -static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList5 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetPrivateData(ID3D12GraphicsCommandList6 *iface, REFGUID guid, UINT *data_size, void *data) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); return vkd3d_get_private_data(&list->private_store, guid, data_size, data); } -static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList5 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateData(ID3D12GraphicsCommandList6 *iface, REFGUID guid, UINT data_size, const void *data) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); return vkd3d_set_private_data(&list->private_store, guid, data_size, data); } -static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList5 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetPrivateDataInterface(ID3D12GraphicsCommandList6 *iface, REFGUID guid, const IUnknown *data) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); return vkd3d_set_private_data_interface(&list->private_store, guid, data); } -static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList5 *iface, const WCHAR *name) +static HRESULT STDMETHODCALLTYPE d3d12_command_list_SetName(ID3D12GraphicsCommandList6 *iface, const WCHAR *name) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, name %s.\n", iface, debugstr_w(name, list->device->wchar_size)); return name ? S_OK : E_INVALIDARG; } -static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList5 *iface, REFIID iid, void **device) +static HRESULT STDMETHODCALLTYPE d3d12_command_list_GetDevice(ID3D12GraphicsCommandList6 *iface, + REFIID iid, void **device) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, iid %s, device %p.\n", iface, debugstr_guid(iid), device); return d3d12_device_query_interface(list->device, iid, device); } -static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList5 *iface) +static D3D12_COMMAND_LIST_TYPE STDMETHODCALLTYPE d3d12_command_list_GetType(ID3D12GraphicsCommandList6 *iface) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p.\n", iface); return list->type; } -static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList5 *iface) +static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandList6 *iface) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct vkd3d_vk_device_procs *vk_procs; VkResult vr; @@ -2458,7 +2464,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Close(ID3D12GraphicsCommandL static void d3d12_command_list_reset_state(struct d3d12_command_list *list, ID3D12PipelineState *initial_pipeline_state) { - ID3D12GraphicsCommandList5 *iface = &list->ID3D12GraphicsCommandList5_iface; + ID3D12GraphicsCommandList6 *iface = &list->ID3D12GraphicsCommandList6_iface; memset(list->strides, 0, sizeof(list->strides)); list->primitive_topology = D3D_PRIMITIVE_TOPOLOGY_POINTLIST; @@ -2494,14 +2500,14 @@ static void d3d12_command_list_reset_state(struct d3d12_command_list *list, list->descriptor_heap_count = 0; - ID3D12GraphicsCommandList5_SetPipelineState(iface, initial_pipeline_state); + ID3D12GraphicsCommandList6_SetPipelineState(iface, initial_pipeline_state); } -static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList5 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandList6 *iface, ID3D12CommandAllocator *allocator, ID3D12PipelineState *initial_pipeline_state) { struct d3d12_command_allocator *allocator_impl = unsafe_impl_from_ID3D12CommandAllocator(allocator); - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); HRESULT hr; TRACE("iface %p, allocator %p, initial_pipeline_state %p.\n", @@ -2528,7 +2534,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_command_list_Reset(ID3D12GraphicsCommandL return hr; } -static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearState(ID3D12GraphicsCommandList6 *iface, ID3D12PipelineState *pipeline_state) { FIXME("iface %p, pipeline_state %p stub!\n", iface, pipeline_state); @@ -2538,7 +2544,7 @@ static bool d3d12_command_list_has_depth_stencil_view(struct d3d12_command_list { struct d3d12_graphics_pipeline_state *graphics; - assert(d3d12_pipeline_state_is_graphics(list->state)); + VKD3D_ASSERT(d3d12_pipeline_state_is_graphics(list->state)); graphics = &list->state->u.graphics; return graphics->dsv_format || (d3d12_pipeline_state_has_unknown_dsv_format(list->state) && list->dsv_format); @@ -2973,30 +2979,20 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list enum vkd3d_pipeline_bind_point bind_point) { struct vkd3d_pipeline_bindings *bindings = &list->pipeline_bindings[bind_point]; + VkWriteDescriptorSet descriptor_writes[ARRAY_SIZE(bindings->push_descriptors)] = {0}; + VkDescriptorBufferInfo buffer_infos[ARRAY_SIZE(bindings->push_descriptors)] = {0}; const struct d3d12_root_signature *root_signature = bindings->root_signature; - VkWriteDescriptorSet *descriptor_writes = NULL, *current_descriptor_write; const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; - VkDescriptorBufferInfo *buffer_infos = NULL, *current_buffer_info; const struct d3d12_root_parameter *root_parameter; struct vkd3d_push_descriptor *push_descriptor; struct d3d12_device *device = list->device; VkDescriptorBufferInfo *vk_buffer_info; - unsigned int i, descriptor_count; + unsigned int i, descriptor_count = 0; VkBufferView *vk_buffer_view; if (!bindings->push_descriptor_dirty_mask) return; - descriptor_count = vkd3d_popcount(bindings->push_descriptor_dirty_mask); - - if (!(descriptor_writes = vkd3d_calloc(descriptor_count, sizeof(*descriptor_writes)))) - return; - if (!(buffer_infos = vkd3d_calloc(descriptor_count, sizeof(*buffer_infos)))) - goto done; - - descriptor_count = 0; - current_buffer_info = buffer_infos; - current_descriptor_write = descriptor_writes; for (i = 0; i < ARRAY_SIZE(bindings->push_descriptors); ++i) { if (!(bindings->push_descriptor_dirty_mask & (1u << i))) @@ -3008,7 +3004,7 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list if (root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV) { vk_buffer_view = NULL; - vk_buffer_info = current_buffer_info; + vk_buffer_info = &buffer_infos[descriptor_count]; vk_buffer_info->buffer = push_descriptor->u.cbv.vk_buffer; vk_buffer_info->offset = push_descriptor->u.cbv.offset; vk_buffer_info->range = VK_WHOLE_SIZE; @@ -3019,21 +3015,15 @@ static void d3d12_command_list_update_push_descriptors(struct d3d12_command_list vk_buffer_info = NULL; } - if (!vk_write_descriptor_set_from_root_descriptor(current_descriptor_write, + if (!vk_write_descriptor_set_from_root_descriptor(&descriptor_writes[descriptor_count], root_parameter, bindings->descriptor_sets[0], vk_buffer_view, vk_buffer_info)) continue; ++descriptor_count; - ++current_descriptor_write; - ++current_buffer_info; } VK_CALL(vkUpdateDescriptorSets(device->vk_device, descriptor_count, descriptor_writes, 0, NULL)); bindings->push_descriptor_dirty_mask = 0; - -done: - vkd3d_free(descriptor_writes); - vkd3d_free(buffer_infos); } static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_command_list *list, @@ -3063,7 +3053,7 @@ static void d3d12_command_list_update_uav_counter_descriptors(struct d3d12_comma const struct vkd3d_shader_uav_counter_binding *uav_counter = &state->uav_counters.bindings[i]; const VkBufferView *vk_uav_counter_views = bindings->vk_uav_counter_views; - assert(vk_uav_counter_views[i]); + VKD3D_ASSERT(vk_uav_counter_views[i]); vk_descriptor_writes[i].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; vk_descriptor_writes[i].pNext = NULL; @@ -3336,7 +3326,7 @@ static bool d3d12_command_list_begin_render_pass(struct d3d12_command_list *list return true; vk_render_pass = list->pso_render_pass; - assert(vk_render_pass); + VKD3D_ASSERT(vk_render_pass); begin_desc.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO; begin_desc.pNext = NULL; @@ -3392,11 +3382,11 @@ static void d3d12_command_list_check_index_buffer_strip_cut_value(struct d3d12_c } } -static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCommandList6 *iface, UINT vertex_count_per_instance, UINT instance_count, UINT start_vertex_location, UINT start_instance_location) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct vkd3d_vk_device_procs *vk_procs; TRACE("iface %p, vertex_count_per_instance %u, instance_count %u, " @@ -3416,11 +3406,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawInstanced(ID3D12GraphicsCom instance_count, start_vertex_location, start_instance_location)); } -static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12GraphicsCommandList6 *iface, UINT index_count_per_instance, UINT instance_count, UINT start_vertex_location, INT base_vertex_location, UINT start_instance_location) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct vkd3d_vk_device_procs *vk_procs; TRACE("iface %p, index_count_per_instance %u, instance_count %u, start_vertex_location %u, " @@ -3442,10 +3432,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_DrawIndexedInstanced(ID3D12Grap instance_count, start_vertex_location, base_vertex_location, start_instance_location)); } -static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandList6 *iface, UINT x, UINT y, UINT z) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct vkd3d_vk_device_procs *vk_procs; TRACE("iface %p, x %u, y %u, z %u.\n", iface, x, y, z); @@ -3461,10 +3451,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_Dispatch(ID3D12GraphicsCommandL VK_CALL(vkCmdDispatch(list->vk_command_buffer, x, y, z)); } -static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12GraphicsCommandList6 *iface, ID3D12Resource *dst, UINT64 dst_offset, ID3D12Resource *src, UINT64 src_offset, UINT64 byte_count) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); struct d3d12_resource *dst_resource, *src_resource; const struct vkd3d_vk_device_procs *vk_procs; VkBufferCopy buffer_copy; @@ -3476,9 +3466,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyBufferRegion(ID3D12Graphics vk_procs = &list->device->vk_procs; dst_resource = unsafe_impl_from_ID3D12Resource(dst); - assert(d3d12_resource_is_buffer(dst_resource)); + VKD3D_ASSERT(d3d12_resource_is_buffer(dst_resource)); src_resource = unsafe_impl_from_ID3D12Resource(src); - assert(d3d12_resource_is_buffer(src_resource)); + VKD3D_ASSERT(d3d12_resource_is_buffer(src_resource)); d3d12_command_list_track_resource_usage(list, dst_resource); d3d12_command_list_track_resource_usage(list, src_resource); @@ -3679,11 +3669,11 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com src_format->dxgi_format, src_format->vk_format, dst_format->dxgi_format, dst_format->vk_format); - assert(d3d12_resource_is_texture(dst_resource)); - assert(d3d12_resource_is_texture(src_resource)); - assert(!vkd3d_format_is_compressed(dst_format)); - assert(!vkd3d_format_is_compressed(src_format)); - assert(dst_format->byte_count == src_format->byte_count); + VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); + VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); + VKD3D_ASSERT(!vkd3d_format_is_compressed(dst_format)); + VKD3D_ASSERT(!vkd3d_format_is_compressed(src_format)); + VKD3D_ASSERT(dst_format->byte_count == src_format->byte_count); buffer_image_copy.bufferOffset = 0; buffer_image_copy.bufferRowLength = 0; @@ -3727,11 +3717,11 @@ static void d3d12_command_list_copy_incompatible_texture_region(struct d3d12_com buffer_image_copy.imageSubresource.layerCount = layer_count; dst_miplevel_idx = buffer_image_copy.imageSubresource.mipLevel; - assert(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == + VKD3D_ASSERT(d3d12_resource_desc_get_width(src_desc, src_miplevel_idx) == d3d12_resource_desc_get_width(dst_desc, dst_miplevel_idx)); - assert(d3d12_resource_desc_get_height(src_desc, src_miplevel_idx) == + VKD3D_ASSERT(d3d12_resource_desc_get_height(src_desc, src_miplevel_idx) == d3d12_resource_desc_get_height(dst_desc, dst_miplevel_idx)); - assert(d3d12_resource_desc_get_depth(src_desc, src_miplevel_idx) == + VKD3D_ASSERT(d3d12_resource_desc_get_depth(src_desc, src_miplevel_idx) == d3d12_resource_desc_get_depth(dst_desc, dst_miplevel_idx)); VK_CALL(vkCmdCopyBufferToImage(list->vk_command_buffer, @@ -3746,11 +3736,11 @@ static bool validate_d3d12_box(const D3D12_BOX *box) && box->back > box->front; } -static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12GraphicsCommandList6 *iface, const D3D12_TEXTURE_COPY_LOCATION *dst, UINT dst_x, UINT dst_y, UINT dst_z, const D3D12_TEXTURE_COPY_LOCATION *src, const D3D12_BOX *src_box) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); struct d3d12_resource *dst_resource, *src_resource; const struct vkd3d_format *src_format, *dst_format; const struct vkd3d_vk_device_procs *vk_procs; @@ -3779,8 +3769,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX && dst->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT) { - assert(d3d12_resource_is_buffer(dst_resource)); - assert(d3d12_resource_is_texture(src_resource)); + VKD3D_ASSERT(d3d12_resource_is_buffer(dst_resource)); + VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); if (!(dst_format = vkd3d_format_from_d3d12_resource_desc(list->device, &src_resource->desc, dst->u.PlacedFootprint.Footprint.Format))) @@ -3808,8 +3798,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic else if (src->Type == D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT && dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX) { - assert(d3d12_resource_is_texture(dst_resource)); - assert(d3d12_resource_is_buffer(src_resource)); + VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); + VKD3D_ASSERT(d3d12_resource_is_buffer(src_resource)); if (!(src_format = vkd3d_format_from_d3d12_resource_desc(list->device, &dst_resource->desc, src->u.PlacedFootprint.Footprint.Format))) @@ -3837,8 +3827,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic else if (src->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX && dst->Type == D3D12_TEXTURE_COPY_TYPE_SUBRESOURCE_INDEX) { - assert(d3d12_resource_is_texture(dst_resource)); - assert(d3d12_resource_is_texture(src_resource)); + VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); + VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); dst_format = dst_resource->format; src_format = src_resource->format; @@ -3871,10 +3861,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTextureRegion(ID3D12Graphic } } -static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsCommandList6 *iface, ID3D12Resource *dst, ID3D12Resource *src) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); struct d3d12_resource *dst_resource, *src_resource; const struct vkd3d_format *dst_format, *src_format; const struct vkd3d_vk_device_procs *vk_procs; @@ -3897,8 +3887,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm if (d3d12_resource_is_buffer(dst_resource)) { - assert(d3d12_resource_is_buffer(src_resource)); - assert(src_resource->desc.Width == dst_resource->desc.Width); + VKD3D_ASSERT(d3d12_resource_is_buffer(src_resource)); + VKD3D_ASSERT(src_resource->desc.Width == dst_resource->desc.Width); vk_buffer_copy.srcOffset = 0; vk_buffer_copy.dstOffset = 0; @@ -3912,10 +3902,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm dst_format = dst_resource->format; src_format = src_resource->format; - assert(d3d12_resource_is_texture(dst_resource)); - assert(d3d12_resource_is_texture(src_resource)); - assert(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); - assert(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); + VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); + VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); + VKD3D_ASSERT(dst_resource->desc.MipLevels == src_resource->desc.MipLevels); + VKD3D_ASSERT(layer_count == d3d12_resource_desc_get_layer_count(&src_resource->desc)); if (src_format->vk_aspect_mask != dst_format->vk_aspect_mask) { @@ -3941,7 +3931,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyResource(ID3D12GraphicsComm } } -static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommandList6 *iface, ID3D12Resource *tiled_resource, const D3D12_TILED_RESOURCE_COORDINATE *tile_region_start_coordinate, const D3D12_TILE_REGION_SIZE *tile_region_size, ID3D12Resource *buffer, UINT64 buffer_offset, D3D12_TILE_COPY_FLAGS flags) @@ -3952,11 +3942,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_CopyTiles(ID3D12GraphicsCommand buffer, buffer_offset, flags); } -static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12GraphicsCommandList6 *iface, ID3D12Resource *dst, UINT dst_sub_resource_idx, ID3D12Resource *src, UINT src_sub_resource_idx, DXGI_FORMAT format) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct vkd3d_format *src_format, *dst_format, *vk_format; struct d3d12_resource *dst_resource, *src_resource; const struct vkd3d_vk_device_procs *vk_procs; @@ -3972,8 +3962,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi dst_resource = unsafe_impl_from_ID3D12Resource(dst); src_resource = unsafe_impl_from_ID3D12Resource(src); - assert(d3d12_resource_is_texture(dst_resource)); - assert(d3d12_resource_is_texture(src_resource)); + VKD3D_ASSERT(d3d12_resource_is_texture(dst_resource)); + VKD3D_ASSERT(d3d12_resource_is_texture(src_resource)); d3d12_command_list_track_resource_usage(list, dst_resource); d3d12_command_list_track_resource_usage(list, src_resource); @@ -4019,10 +4009,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresource(ID3D12Graphi VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &vk_image_resolve)); } -static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12GraphicsCommandList6 *iface, D3D12_PRIMITIVE_TOPOLOGY topology) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, topology %#x.\n", iface, topology); @@ -4033,11 +4023,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetPrimitiveTopology(ID3D12Gr d3d12_command_list_invalidate_current_pipeline(list); } -static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCommandList6 *iface, UINT viewport_count, const D3D12_VIEWPORT *viewports) { VkViewport vk_viewports[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct vkd3d_vk_device_procs *vk_procs; unsigned int i; @@ -4071,10 +4061,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetViewports(ID3D12GraphicsCo VK_CALL(vkCmdSetViewport(list->vk_command_buffer, 0, viewport_count, vk_viewports)); } -static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12GraphicsCommandList6 *iface, UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); VkRect2D vk_rects[D3D12_VIEWPORT_AND_SCISSORRECT_OBJECT_COUNT_PER_PIPELINE]; const struct vkd3d_vk_device_procs *vk_procs; unsigned int i; @@ -4099,10 +4089,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_RSSetScissorRects(ID3D12Graphic VK_CALL(vkCmdSetScissor(list->vk_command_buffer, 0, rect_count, vk_rects)); } -static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12GraphicsCommandList6 *iface, const FLOAT blend_factor[4]) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct vkd3d_vk_device_procs *vk_procs; TRACE("iface %p, blend_factor %p.\n", iface, blend_factor); @@ -4111,10 +4101,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetBlendFactor(ID3D12Graphics VK_CALL(vkCmdSetBlendConstants(list->vk_command_buffer, blend_factor)); } -static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsCommandList6 *iface, UINT stencil_ref) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct vkd3d_vk_device_procs *vk_procs; TRACE("iface %p, stencil_ref %u.\n", iface, stencil_ref); @@ -4123,11 +4113,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_OMSetStencilRef(ID3D12GraphicsC VK_CALL(vkCmdSetStencilReference(list->vk_command_buffer, VK_STENCIL_FRONT_AND_BACK, stencil_ref)); } -static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState(ID3D12GraphicsCommandList6 *iface, ID3D12PipelineState *pipeline_state) { struct d3d12_pipeline_state *state = unsafe_impl_from_ID3D12PipelineState(pipeline_state); - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, pipeline_state %p.\n", iface, pipeline_state); @@ -4178,10 +4168,10 @@ static unsigned int d3d12_find_ds_multiplanar_transition(const D3D12_RESOURCE_BA return 0; } -static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsCommandList6 *iface, UINT barrier_count, const D3D12_RESOURCE_BARRIER *barriers) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); bool have_aliasing_barriers = false, have_split_barriers = false; const struct vkd3d_vk_device_procs *vk_procs; const struct vkd3d_vulkan_info *vk_info; @@ -4277,13 +4267,15 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC } if (!vk_barrier_parameters_from_d3d12_resource_state(state_before, stencil_state_before, - resource, list->vk_queue_flags, vk_info, &src_access_mask, &src_stage_mask, &layout_before)) + resource, list->vk_queue_flags, vk_info, &src_access_mask, + &src_stage_mask, &layout_before, list->device)) { FIXME("Unhandled state %#x.\n", state_before); continue; } if (!vk_barrier_parameters_from_d3d12_resource_state(state_after, stencil_state_after, - resource, list->vk_queue_flags, vk_info, &dst_access_mask, &dst_stage_mask, &layout_after)) + resource, list->vk_queue_flags, vk_info, &dst_access_mask, + &dst_stage_mask, &layout_after, list->device)) { FIXME("Unhandled state %#x.\n", state_after); continue; @@ -4303,7 +4295,8 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC resource = unsafe_impl_from_ID3D12Resource(uav->pResource); vk_barrier_parameters_from_d3d12_resource_state(D3D12_RESOURCE_STATE_UNORDERED_ACCESS, 0, - resource, list->vk_queue_flags, vk_info, &access_mask, &stage_mask, &image_layout); + resource, list->vk_queue_flags, vk_info, &access_mask, + &stage_mask, &image_layout, list->device); src_access_mask = dst_access_mask = access_mask; src_stage_mask = dst_stage_mask = stage_mask; layout_before = layout_after = image_layout; @@ -4404,13 +4397,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResourceBarrier(ID3D12GraphicsC WARN("Issuing split barrier(s) on D3D12_RESOURCE_BARRIER_FLAG_END_ONLY.\n"); } -static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ExecuteBundle(ID3D12GraphicsCommandList6 *iface, ID3D12GraphicsCommandList *command_list) { FIXME("iface %p, command_list %p stub!\n", iface, command_list); } -static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetDescriptorHeaps(ID3D12GraphicsCommandList6 *iface, UINT heap_count, ID3D12DescriptorHeap *const *heaps) { TRACE("iface %p, heap_count %u, heaps %p.\n", iface, heap_count, heaps); @@ -4436,10 +4429,10 @@ static void d3d12_command_list_set_root_signature(struct d3d12_command_list *lis d3d12_command_list_invalidate_root_parameters(list, bind_point); } -static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12GraphicsCommandList6 *iface, ID3D12RootSignature *root_signature) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_signature %p.\n", iface, root_signature); @@ -4447,10 +4440,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootSignature(ID3D12G unsafe_impl_from_ID3D12RootSignature(root_signature)); } -static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootSignature(ID3D12GraphicsCommandList6 *iface, ID3D12RootSignature *root_signature) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_signature %p.\n", iface, root_signature); @@ -4466,9 +4459,9 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l struct d3d12_descriptor_heap *descriptor_heap; struct d3d12_desc *desc; - assert(root_signature_get_descriptor_table(root_signature, index)); + VKD3D_ASSERT(root_signature_get_descriptor_table(root_signature, index)); - assert(index < ARRAY_SIZE(bindings->descriptor_tables)); + VKD3D_ASSERT(index < ARRAY_SIZE(bindings->descriptor_tables)); desc = d3d12_desc_from_gpu_handle(base_descriptor); if (bindings->descriptor_tables[index] == desc) @@ -4489,10 +4482,10 @@ static void d3d12_command_list_set_descriptor_table(struct d3d12_command_list *l bindings->descriptor_table_active_mask |= (uint64_t)1 << index; } -static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_parameter_index %u, base_descriptor %s.\n", iface, root_parameter_index, debug_gpu_handle(base_descriptor)); @@ -4501,10 +4494,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootDescriptorTable(I root_parameter_index, base_descriptor); } -static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootDescriptorTable(ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_DESCRIPTOR_HANDLE base_descriptor) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_parameter_index %u, base_descriptor %s.\n", iface, root_parameter_index, debug_gpu_handle(base_descriptor)); @@ -4526,10 +4519,10 @@ static void d3d12_command_list_set_root_constants(struct d3d12_command_list *lis c->stage_flags, c->offset + offset * sizeof(uint32_t), count * sizeof(uint32_t), data)); } -static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, UINT data, UINT dst_offset) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", iface, root_parameter_index, data, dst_offset); @@ -4538,10 +4531,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstant(ID3 root_parameter_index, dst_offset, 1, &data); } -static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, UINT data, UINT dst_offset) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_parameter_index %u, data 0x%08x, dst_offset %u.\n", iface, root_parameter_index, data, dst_offset); @@ -4550,10 +4543,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstant(ID root_parameter_index, dst_offset, 1, &data); } -static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", iface, root_parameter_index, constant_count, data, dst_offset); @@ -4562,10 +4555,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRoot32BitConstants(ID root_parameter_index, dst_offset, constant_count, data); } -static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRoot32BitConstants(ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, UINT constant_count, const void *data, UINT dst_offset) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_parameter_index %u, constant_count %u, data %p, dst_offset %u.\n", iface, root_parameter_index, constant_count, data, dst_offset); @@ -4587,7 +4580,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, struct d3d12_resource *resource; root_parameter = root_signature_get_root_descriptor(root_signature, index); - assert(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); + VKD3D_ASSERT(root_parameter->parameter_type == D3D12_ROOT_PARAMETER_TYPE_CBV); if (gpu_address) { @@ -4618,7 +4611,7 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, root_parameter, bindings->descriptor_sets[0], NULL, &buffer_info); VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); - assert(index < ARRAY_SIZE(bindings->push_descriptors)); + VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); bindings->push_descriptors[index].u.cbv.vk_buffer = buffer_info.buffer; bindings->push_descriptors[index].u.cbv.offset = buffer_info.offset; bindings->push_descriptor_dirty_mask |= 1u << index; @@ -4627,9 +4620,9 @@ static void d3d12_command_list_set_root_cbv(struct d3d12_command_list *list, } static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferView( - ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4638,9 +4631,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootConstantBufferVie } static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootConstantBufferView( - ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4661,7 +4654,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li VkBufferView vk_buffer_view; root_parameter = root_signature_get_root_descriptor(root_signature, index); - assert(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); + VKD3D_ASSERT(root_parameter->parameter_type != D3D12_ROOT_PARAMETER_TYPE_CBV); /* FIXME: Re-use buffer views. */ if (!vkd3d_create_raw_buffer_view(list->device, gpu_address, root_parameter->parameter_type, &vk_buffer_view)) @@ -4691,7 +4684,7 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li root_parameter, bindings->descriptor_sets[0], &vk_buffer_view, NULL); VK_CALL(vkUpdateDescriptorSets(list->device->vk_device, 1, &descriptor_write, 0, NULL)); - assert(index < ARRAY_SIZE(bindings->push_descriptors)); + VKD3D_ASSERT(index < ARRAY_SIZE(bindings->push_descriptors)); bindings->push_descriptors[index].u.vk_buffer_view = vk_buffer_view; bindings->push_descriptor_dirty_mask |= 1u << index; bindings->push_descriptor_active_mask |= 1u << index; @@ -4699,9 +4692,9 @@ static void d3d12_command_list_set_root_descriptor(struct d3d12_command_list *li } static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceView( - ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4711,9 +4704,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootShaderResourceVie } static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceView( - ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4723,9 +4716,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootShaderResourceVi } static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessView( - ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4735,9 +4728,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetComputeRootUnorderedAccessVi } static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessView( - ID3D12GraphicsCommandList5 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) + ID3D12GraphicsCommandList6 *iface, UINT root_parameter_index, D3D12_GPU_VIRTUAL_ADDRESS address) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); TRACE("iface %p, root_parameter_index %u, address %#"PRIx64".\n", iface, root_parameter_index, address); @@ -4746,10 +4739,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetGraphicsRootUnorderedAccessV root_parameter_index, address); } -static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12GraphicsCommandList6 *iface, const D3D12_INDEX_BUFFER_VIEW *view) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct vkd3d_vk_device_procs *vk_procs; struct d3d12_resource *resource; enum VkIndexType index_type; @@ -4789,10 +4782,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetIndexBuffer(ID3D12Graphics view->BufferLocation - resource->gpu_address, index_type)); } -static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12GraphicsCommandList6 *iface, UINT start_slot, UINT view_count, const D3D12_VERTEX_BUFFER_VIEW *views) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct vkd3d_null_resources *null_resources; struct vkd3d_gpu_va_allocator *gpu_va_allocator; VkDeviceSize offsets[ARRAY_SIZE(list->strides)]; @@ -4814,15 +4807,9 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi return; } - if (!views) - { - WARN("NULL \"views\" pointer specified.\n"); - return; - } - for (i = 0; i < view_count; ++i) { - if (views[i].BufferLocation) + if (views && views[i].BufferLocation) { resource = vkd3d_gpu_va_allocator_dereference(gpu_va_allocator, views[i].BufferLocation); buffers[i] = resource->u.vk_buffer; @@ -4847,10 +4834,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi d3d12_command_list_invalidate_current_pipeline(list); } -static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsCommandList6 *iface, UINT start_slot, UINT view_count, const D3D12_STREAM_OUTPUT_BUFFER_VIEW *views) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); VkDeviceSize offsets[ARRAY_SIZE(list->so_counter_buffers)]; VkDeviceSize sizes[ARRAY_SIZE(list->so_counter_buffers)]; VkBuffer buffers[ARRAY_SIZE(list->so_counter_buffers)]; @@ -4912,11 +4899,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_SOSetTargets(ID3D12GraphicsComm VK_CALL(vkCmdBindTransformFeedbackBuffersEXT(list->vk_command_buffer, first, count, buffers, offsets, sizes)); } -static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_OMSetRenderTargets(ID3D12GraphicsCommandList6 *iface, UINT render_target_descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE *render_target_descriptors, BOOL single_descriptor_handle, const D3D12_CPU_DESCRIPTOR_HANDLE *depth_stencil_descriptor) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct d3d12_rtv_desc *rtv_desc; const struct d3d12_dsv_desc *dsv_desc; VkFormat prev_dsv_format; @@ -5117,12 +5104,12 @@ static void d3d12_command_list_clear(struct d3d12_command_list *list, } } -static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12GraphicsCommandList6 *iface, D3D12_CPU_DESCRIPTOR_HANDLE dsv, D3D12_CLEAR_FLAGS flags, float depth, UINT8 stencil, UINT rect_count, const D3D12_RECT *rects) { const union VkClearValue clear_value = {.depthStencil = {depth, stencil}}; - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct d3d12_dsv_desc *dsv_desc = d3d12_dsv_desc_from_cpu_handle(dsv); struct VkAttachmentDescription attachment_desc; struct VkAttachmentReference ds_reference; @@ -5166,10 +5153,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearDepthStencilView(ID3D12Gra &clear_value, rect_count, rects); } -static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearRenderTargetView(ID3D12GraphicsCommandList6 *iface, D3D12_CPU_DESCRIPTOR_HANDLE rtv, const FLOAT color[4], UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const struct d3d12_rtv_desc *rtv_desc = d3d12_rtv_desc_from_cpu_handle(rtv); struct VkAttachmentDescription attachment_desc; struct VkAttachmentReference color_reference; @@ -5288,11 +5275,13 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, struct d3d12_resource *resource, struct vkd3d_view *descriptor, const VkClearColorValue *clear_colour, unsigned int rect_count, const D3D12_RECT *rects) { + const VkPhysicalDeviceLimits *device_limits = &list->device->vk_info.device_limits; const struct vkd3d_vk_device_procs *vk_procs = &list->device->vk_procs; unsigned int i, miplevel_idx, layer_count; struct vkd3d_uav_clear_pipeline pipeline; struct vkd3d_uav_clear_args clear_args; const struct vkd3d_resource_view *view; + uint32_t count_x, count_y, count_z; VkDescriptorImageInfo image_info; D3D12_RECT full_rect, curr_rect; VkWriteDescriptorSet write_set; @@ -5383,18 +5372,32 @@ static void d3d12_command_list_clear_uav(struct d3d12_command_list *list, if (curr_rect.left >= curr_rect.right || curr_rect.top >= curr_rect.bottom) continue; - clear_args.offset.x = curr_rect.left; clear_args.offset.y = curr_rect.top; - clear_args.extent.width = curr_rect.right - curr_rect.left; clear_args.extent.height = curr_rect.bottom - curr_rect.top; - VK_CALL(vkCmdPushConstants(list->vk_command_buffer, pipeline.vk_pipeline_layout, - VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(clear_args), &clear_args)); + count_y = vkd3d_compute_workgroup_count(clear_args.extent.height, pipeline.group_size.height); + count_z = vkd3d_compute_workgroup_count(layer_count, pipeline.group_size.depth); + if (count_y > device_limits->maxComputeWorkGroupCount[1]) + FIXME("Group Y count %u exceeds max %u.\n", count_y, device_limits->maxComputeWorkGroupCount[1]); + if (count_z > device_limits->maxComputeWorkGroupCount[2]) + FIXME("Group Z count %u exceeds max %u.\n", count_z, device_limits->maxComputeWorkGroupCount[2]); + + do + { + clear_args.offset.x = curr_rect.left; + clear_args.extent.width = curr_rect.right - curr_rect.left; + + count_x = vkd3d_compute_workgroup_count(clear_args.extent.width, pipeline.group_size.width); + count_x = min(count_x, device_limits->maxComputeWorkGroupCount[0]); - VK_CALL(vkCmdDispatch(list->vk_command_buffer, - vkd3d_compute_workgroup_count(clear_args.extent.width, pipeline.group_size.width), - vkd3d_compute_workgroup_count(clear_args.extent.height, pipeline.group_size.height), - vkd3d_compute_workgroup_count(layer_count, pipeline.group_size.depth))); + VK_CALL(vkCmdPushConstants(list->vk_command_buffer, pipeline.vk_pipeline_layout, + VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(clear_args), &clear_args)); + + VK_CALL(vkCmdDispatch(list->vk_command_buffer, count_x, count_y, count_z)); + + curr_rect.left += count_x * pipeline.group_size.width; + } + while (curr_rect.right > curr_rect.left); } } @@ -5434,15 +5437,59 @@ static const struct vkd3d_format *vkd3d_fixup_clear_uav_uint_colour(struct d3d12 } } -static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList5 *iface, +static struct vkd3d_view *create_uint_view(struct d3d12_device *device, const struct vkd3d_resource_view *view, + struct d3d12_resource *resource, VkClearColorValue *colour) +{ + struct vkd3d_texture_view_desc view_desc; + const struct vkd3d_format *uint_format; + struct vkd3d_view *uint_view; + + if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) + && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, colour))) + { + ERR("Unhandled format %#x.\n", view->format->dxgi_format); + return NULL; + } + + if (d3d12_resource_is_buffer(resource)) + { + if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource->u.vk_buffer, + uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) + { + ERR("Failed to create buffer view.\n"); + return NULL; + } + + return uint_view; + } + + memset(&view_desc, 0, sizeof(view_desc)); + view_desc.view_type = view->info.texture.vk_view_type; + view_desc.format = uint_format; + view_desc.miplevel_idx = view->info.texture.miplevel_idx; + view_desc.miplevel_count = 1; + view_desc.layer_idx = view->info.texture.layer_idx; + view_desc.layer_count = view->info.texture.layer_count; + view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; + view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; + + if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, + resource->u.vk_image, &view_desc, &uint_view)) + { + ERR("Failed to create image view.\n"); + return NULL; + } + + return uint_view; +} + +static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID3D12GraphicsCommandList6 *iface, D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, const UINT values[4], UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); struct vkd3d_view *descriptor, *uint_view = NULL; struct d3d12_device *device = list->device; - struct vkd3d_texture_view_desc view_desc; - const struct vkd3d_format *uint_format; const struct vkd3d_resource_view *view; struct d3d12_resource *resource_impl; VkClearColorValue colour; @@ -5456,44 +5503,11 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID view = &descriptor->v; memcpy(colour.uint32, values, sizeof(colour.uint32)); - if (view->format->type != VKD3D_FORMAT_TYPE_UINT) + if (view->format->type != VKD3D_FORMAT_TYPE_UINT + && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) { - if (!(uint_format = vkd3d_find_uint_format(device, view->format->dxgi_format)) - && !(uint_format = vkd3d_fixup_clear_uav_uint_colour(device, view->format->dxgi_format, &colour))) - { - ERR("Unhandled format %#x.\n", view->format->dxgi_format); - return; - } - - if (d3d12_resource_is_buffer(resource_impl)) - { - if (!vkd3d_create_buffer_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_buffer, - uint_format, view->info.buffer.offset, view->info.buffer.size, &uint_view)) - { - ERR("Failed to create buffer view.\n"); - return; - } - } - else - { - memset(&view_desc, 0, sizeof(view_desc)); - view_desc.view_type = view->info.texture.vk_view_type; - view_desc.format = uint_format; - view_desc.miplevel_idx = view->info.texture.miplevel_idx; - view_desc.miplevel_count = 1; - view_desc.layer_idx = view->info.texture.layer_idx; - view_desc.layer_count = view->info.texture.layer_count; - view_desc.vk_image_aspect = VK_IMAGE_ASPECT_COLOR_BIT; - view_desc.usage = VK_IMAGE_USAGE_STORAGE_BIT; - - if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_UAV, resource_impl->u.vk_image, &view_desc, - &uint_view)) - { - ERR("Failed to create image view.\n"); - return; - } - } - descriptor = uint_view; + ERR("Failed to create UINT view.\n"); + return; } d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); @@ -5502,36 +5516,49 @@ static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewUint(ID vkd3d_view_decref(uint_view, device); } -static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ClearUnorderedAccessViewFloat(ID3D12GraphicsCommandList6 *iface, D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle, D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle, ID3D12Resource *resource, const float values[4], UINT rect_count, const D3D12_RECT *rects) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); + struct vkd3d_view *descriptor, *uint_view = NULL; + struct d3d12_device *device = list->device; + const struct vkd3d_resource_view *view; struct d3d12_resource *resource_impl; VkClearColorValue colour; - struct vkd3d_view *view; TRACE("iface %p, gpu_handle %s, cpu_handle %s, resource %p, values %p, rect_count %u, rects %p.\n", iface, debug_gpu_handle(gpu_handle), debug_cpu_handle(cpu_handle), resource, values, rect_count, rects); resource_impl = unsafe_impl_from_ID3D12Resource(resource); - if (!(view = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) + if (!(descriptor = d3d12_desc_from_cpu_handle(cpu_handle)->s.u.view)) return; + view = &descriptor->v; memcpy(colour.float32, values, sizeof(colour.float32)); - d3d12_command_list_clear_uav(list, resource_impl, view, &colour, rect_count, rects); + if (view->format->type == VKD3D_FORMAT_TYPE_SINT + && !(descriptor = uint_view = create_uint_view(device, view, resource_impl, &colour))) + { + ERR("Failed to create UINT view.\n"); + return; + } + + d3d12_command_list_clear_uav(list, resource_impl, descriptor, &colour, rect_count, rects); + + if (uint_view) + vkd3d_view_decref(uint_view, device); } -static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_DiscardResource(ID3D12GraphicsCommandList6 *iface, ID3D12Resource *resource, const D3D12_DISCARD_REGION *region) { FIXME_ONCE("iface %p, resource %p, region %p stub!\n", iface, resource, region); } -static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsCommandList6 *iface, ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); const struct vkd3d_vk_device_procs *vk_procs; VkQueryControlFlags flags = 0; @@ -5558,10 +5585,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginQuery(ID3D12GraphicsComman VK_CALL(vkCmdBeginQuery(list->vk_command_buffer, query_heap->vk_query_pool, index, flags)); } -static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_EndQuery(ID3D12GraphicsCommandList6 *iface, ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT index) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); const struct vkd3d_vk_device_procs *vk_procs; @@ -5603,12 +5630,12 @@ static size_t get_query_stride(D3D12_QUERY_TYPE type) return sizeof(uint64_t); } -static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12GraphicsCommandList6 *iface, ID3D12QueryHeap *heap, D3D12_QUERY_TYPE type, UINT start_index, UINT query_count, ID3D12Resource *dst_buffer, UINT64 aligned_dst_buffer_offset) { const struct d3d12_query_heap *query_heap = unsafe_impl_from_ID3D12QueryHeap(heap); - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); struct d3d12_resource *buffer = unsafe_impl_from_ID3D12Resource(dst_buffer); const struct vkd3d_vk_device_procs *vk_procs; unsigned int i, first, count; @@ -5684,10 +5711,10 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveQueryData(ID3D12Graphics } } -static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCommandList6 *iface, ID3D12Resource *buffer, UINT64 aligned_buffer_offset, D3D12_PREDICATION_OP operation) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); struct d3d12_resource *resource = unsafe_impl_from_ID3D12Resource(buffer); const struct vkd3d_vulkan_info *vk_info = &list->device->vk_info; const struct vkd3d_vk_device_procs *vk_procs; @@ -5756,19 +5783,19 @@ static void STDMETHODCALLTYPE d3d12_command_list_SetPredication(ID3D12GraphicsCo } } -static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetMarker(ID3D12GraphicsCommandList6 *iface, UINT metadata, const void *data, UINT size) { FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); } -static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_BeginEvent(ID3D12GraphicsCommandList6 *iface, UINT metadata, const void *data, UINT size) { FIXME("iface %p, metadata %#x, data %p, size %u stub!\n", iface, metadata, data, size); } -static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList5 *iface) +static void STDMETHODCALLTYPE d3d12_command_list_EndEvent(ID3D12GraphicsCommandList6 *iface) { FIXME("iface %p stub!\n", iface); } @@ -5777,14 +5804,14 @@ STATIC_ASSERT(sizeof(VkDispatchIndirectCommand) == sizeof(D3D12_DISPATCH_ARGUMEN STATIC_ASSERT(sizeof(VkDrawIndexedIndirectCommand) == sizeof(D3D12_DRAW_INDEXED_ARGUMENTS)); STATIC_ASSERT(sizeof(VkDrawIndirectCommand) == sizeof(D3D12_DRAW_ARGUMENTS)); -static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsCommandList6 *iface, ID3D12CommandSignature *command_signature, UINT max_command_count, ID3D12Resource *arg_buffer, UINT64 arg_buffer_offset, ID3D12Resource *count_buffer, UINT64 count_buffer_offset) { struct d3d12_command_signature *sig_impl = unsafe_impl_from_ID3D12CommandSignature(command_signature); struct d3d12_resource *count_impl = unsafe_impl_from_ID3D12Resource(count_buffer); struct d3d12_resource *arg_impl = unsafe_impl_from_ID3D12Resource(arg_buffer); - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); const D3D12_COMMAND_SIGNATURE_DESC *signature_desc; const struct vkd3d_vk_device_procs *vk_procs; unsigned int i; @@ -5883,7 +5910,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_ExecuteIndirect(ID3D12GraphicsC d3d12_command_signature_decref(sig_impl); } -static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12GraphicsCommandList6 *iface, ID3D12Resource *dst_buffer, UINT64 dst_offset, ID3D12Resource *src_buffer, UINT64 src_offset, UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, @@ -5896,7 +5923,7 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT(ID3D12Grap dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); } -static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12GraphicsCommandList6 *iface, ID3D12Resource *dst_buffer, UINT64 dst_offset, ID3D12Resource *src_buffer, UINT64 src_offset, UINT dependent_resource_count, ID3D12Resource * const *dependent_resources, @@ -5909,20 +5936,20 @@ static void STDMETHODCALLTYPE d3d12_command_list_AtomicCopyBufferUINT64(ID3D12Gr dependent_resource_count, dependent_resources, dependent_sub_resource_ranges); } -static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_OMSetDepthBounds(ID3D12GraphicsCommandList6 *iface, FLOAT min, FLOAT max) { FIXME("iface %p, min %.8e, max %.8e stub!\n", iface, min, max); } -static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetSamplePositions(ID3D12GraphicsCommandList6 *iface, UINT sample_count, UINT pixel_count, D3D12_SAMPLE_POSITION *sample_positions) { FIXME("iface %p, sample_count %u, pixel_count %u, sample_positions %p stub!\n", iface, sample_count, pixel_count, sample_positions); } -static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12GraphicsCommandList6 *iface, ID3D12Resource *dst_resource, UINT dst_sub_resource_idx, UINT dst_x, UINT dst_y, ID3D12Resource *src_resource, UINT src_sub_resource_idx, D3D12_RECT *src_rect, DXGI_FORMAT format, D3D12_RESOLVE_MODE mode) @@ -5934,16 +5961,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_ResolveSubresourceRegion(ID3D12 src_resource, src_sub_resource_idx, src_rect, format, mode); } -static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList5 *iface, UINT mask) +static void STDMETHODCALLTYPE d3d12_command_list_SetViewInstanceMask(ID3D12GraphicsCommandList6 *iface, UINT mask) { FIXME("iface %p, mask %#x stub!\n", iface, mask); } -static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12GraphicsCommandList6 *iface, UINT count, const D3D12_WRITEBUFFERIMMEDIATE_PARAMETER *parameters, const D3D12_WRITEBUFFERIMMEDIATE_MODE *modes) { - struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList5(iface); + struct d3d12_command_list *list = impl_from_ID3D12GraphicsCommandList6(iface); struct d3d12_resource *resource; unsigned int i; @@ -5956,13 +5983,13 @@ static void STDMETHODCALLTYPE d3d12_command_list_WriteBufferImmediate(ID3D12Grap } } -static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetProtectedResourceSession(ID3D12GraphicsCommandList6 *iface, ID3D12ProtectedResourceSession *protected_session) { FIXME("iface %p, protected_session %p stub!\n", iface, protected_session); } -static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(ID3D12GraphicsCommandList6 *iface, UINT count, const D3D12_RENDER_PASS_RENDER_TARGET_DESC *render_targets, const D3D12_RENDER_PASS_DEPTH_STENCIL_DESC *depth_stencil, D3D12_RENDER_PASS_FLAGS flags) { @@ -5970,74 +5997,78 @@ static void STDMETHODCALLTYPE d3d12_command_list_BeginRenderPass(ID3D12GraphicsC count, render_targets, depth_stencil, flags); } -static void STDMETHODCALLTYPE d3d12_command_list_EndRenderPass(ID3D12GraphicsCommandList5 *iface) +static void STDMETHODCALLTYPE d3d12_command_list_EndRenderPass(ID3D12GraphicsCommandList6 *iface) { FIXME("iface %p stub!\n", iface); } -static void STDMETHODCALLTYPE d3d12_command_list_InitializeMetaCommand(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_InitializeMetaCommand(ID3D12GraphicsCommandList6 *iface, ID3D12MetaCommand *meta_command, const void *parameters_data, SIZE_T data_size_in_bytes) { FIXME("iface %p, meta_command %p, parameters_data %p, data_size_in_bytes %"PRIuPTR" stub!\n", iface, meta_command, parameters_data, (uintptr_t)data_size_in_bytes); } -static void STDMETHODCALLTYPE d3d12_command_list_ExecuteMetaCommand(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_ExecuteMetaCommand(ID3D12GraphicsCommandList6 *iface, ID3D12MetaCommand *meta_command, const void *parameters_data, SIZE_T data_size_in_bytes) { FIXME("iface %p, meta_command %p, parameters_data %p, data_size_in_bytes %"PRIuPTR" stub!\n", iface, meta_command, parameters_data, (uintptr_t)data_size_in_bytes); } -static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStructure(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_BuildRaytracingAccelerationStructure(ID3D12GraphicsCommandList6 *iface, const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_DESC *desc, UINT count, const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *postbuild_info_descs) { FIXME("iface %p, desc %p, count %u, postbuild_info_descs %p stub!\n", iface, desc, count, postbuild_info_descs); } -static void STDMETHODCALLTYPE d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo(ID3D12GraphicsCommandList5 *iface, - const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc, +static void STDMETHODCALLTYPE d3d12_command_list_EmitRaytracingAccelerationStructurePostbuildInfo( + ID3D12GraphicsCommandList6 *iface, const D3D12_RAYTRACING_ACCELERATION_STRUCTURE_POSTBUILD_INFO_DESC *desc, UINT structures_count, const D3D12_GPU_VIRTUAL_ADDRESS *src_structure_data) { FIXME("iface %p, desc %p, structures_count %u, src_structure_data %p stub!\n", iface, desc, structures_count, src_structure_data); } -static void STDMETHODCALLTYPE d3d12_command_list_CopyRaytracingAccelerationStructure(ID3D12GraphicsCommandList5 *iface, - D3D12_GPU_VIRTUAL_ADDRESS dst_structure_data, - D3D12_GPU_VIRTUAL_ADDRESS src_structure_data, +static void STDMETHODCALLTYPE d3d12_command_list_CopyRaytracingAccelerationStructure(ID3D12GraphicsCommandList6 *iface, + D3D12_GPU_VIRTUAL_ADDRESS dst_structure_data, D3D12_GPU_VIRTUAL_ADDRESS src_structure_data, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_COPY_MODE mode) { FIXME("iface %p, dst_structure_data %#"PRIx64", src_structure_data %#"PRIx64", mode %u stub!\n", iface, dst_structure_data, src_structure_data, mode); } -static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_SetPipelineState1(ID3D12GraphicsCommandList6 *iface, ID3D12StateObject *state_object) { FIXME("iface %p, state_object %p stub!\n", iface, state_object); } -static void STDMETHODCALLTYPE d3d12_command_list_DispatchRays(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_DispatchRays(ID3D12GraphicsCommandList6 *iface, const D3D12_DISPATCH_RAYS_DESC *desc) { FIXME("iface %p, desc %p stub!\n", iface, desc); } -static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRate(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRate(ID3D12GraphicsCommandList6 *iface, D3D12_SHADING_RATE rate, const D3D12_SHADING_RATE_COMBINER *combiners) { FIXME("iface %p, rate %#x, combiners %p stub!\n", iface, rate, combiners); } -static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage(ID3D12GraphicsCommandList5 *iface, +static void STDMETHODCALLTYPE d3d12_command_list_RSSetShadingRateImage(ID3D12GraphicsCommandList6 *iface, ID3D12Resource *rate_image) { FIXME("iface %p, rate_image %p stub!\n", iface, rate_image); } -static const struct ID3D12GraphicsCommandList5Vtbl d3d12_command_list_vtbl = +static void STDMETHODCALLTYPE d3d12_command_list_DispatchMesh(ID3D12GraphicsCommandList6 *iface, UINT x, UINT y, UINT z) +{ + FIXME("iface %p, x %u, y %u, z %u stub!\n", iface, x, y, z); +} + +static const struct ID3D12GraphicsCommandList6Vtbl d3d12_command_list_vtbl = { /* IUnknown methods */ d3d12_command_list_QueryInterface, @@ -6128,14 +6159,16 @@ static const struct ID3D12GraphicsCommandList5Vtbl d3d12_command_list_vtbl = /* ID3D12GraphicsCommandList5 methods */ d3d12_command_list_RSSetShadingRate, d3d12_command_list_RSSetShadingRateImage, + /* ID3D12GraphicsCommandList6 methods */ + d3d12_command_list_DispatchMesh, }; static struct d3d12_command_list *unsafe_impl_from_ID3D12CommandList(ID3D12CommandList *iface) { if (!iface) return NULL; - assert(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); - return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList5_iface); + VKD3D_ASSERT(iface->lpVtbl == (struct ID3D12CommandListVtbl *)&d3d12_command_list_vtbl); + return CONTAINING_RECORD(iface, struct d3d12_command_list, ID3D12GraphicsCommandList6_iface); } static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d3d12_device *device, @@ -6144,7 +6177,7 @@ static HRESULT d3d12_command_list_init(struct d3d12_command_list *list, struct d { HRESULT hr; - list->ID3D12GraphicsCommandList5_iface.lpVtbl = &d3d12_command_list_vtbl; + list->ID3D12GraphicsCommandList6_iface.lpVtbl = &d3d12_command_list_vtbl; list->refcount = 1; list->type = type; @@ -6748,7 +6781,7 @@ static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_qu } vk_semaphore = fence->timeline_semaphore; - assert(vk_semaphore); + VKD3D_ASSERT(vk_semaphore); } else { @@ -6821,7 +6854,7 @@ static HRESULT d3d12_command_queue_signal(struct d3d12_command_queue *command_qu return hr; vk_semaphore = fence->timeline_semaphore; - assert(vk_semaphore); + VKD3D_ASSERT(vk_semaphore); return vkd3d_enqueue_timeline_semaphore(&command_queue->fence_worker, vk_semaphore, fence, timeline_value, vkd3d_queue); @@ -6990,7 +7023,7 @@ static HRESULT d3d12_command_queue_wait_locked(struct d3d12_command_queue *comma * until we have submitted, so the semaphore cannot be destroyed before the call to vkQueueSubmit. */ vkd3d_mutex_unlock(&fence->mutex); - assert(fence->timeline_semaphore); + VKD3D_ASSERT(fence->timeline_semaphore); timeline_submit_info.sType = VK_STRUCTURE_TYPE_TIMELINE_SEMAPHORE_SUBMIT_INFO_KHR; timeline_submit_info.pNext = NULL; timeline_submit_info.waitSemaphoreValueCount = 1; @@ -7254,7 +7287,7 @@ static HRESULT d3d12_command_queue_flush_ops_locked(struct d3d12_command_queue * queue->is_flushing = true; - assert(queue->aux_op_queue.count == 0); + VKD3D_ASSERT(queue->aux_op_queue.count == 0); while (queue->op_queue.count != 0) { @@ -7544,7 +7577,7 @@ struct d3d12_command_signature *unsafe_impl_from_ID3D12CommandSignature(ID3D12Co { if (!iface) return NULL; - assert(iface->lpVtbl == &d3d12_command_signature_vtbl); + VKD3D_ASSERT(iface->lpVtbl == &d3d12_command_signature_vtbl); return CONTAINING_RECORD(iface, struct d3d12_command_signature, ID3D12CommandSignature_iface); } diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index cfc9c5f5ed3..01841c89692 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -76,6 +76,14 @@ static const char * const required_device_extensions[] = VK_KHR_SHADER_DRAW_PARAMETERS_EXTENSION_NAME, }; +/* In general we don't want to enable Vulkan beta extensions, but make an + * exception for VK_KHR_portability_subset because we draw no real feature from + * it, but it's still useful to be able to develop for MoltenVK without being + * spammed with validation errors. */ +#ifndef VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME +#define VK_KHR_PORTABILITY_SUBSET_EXTENSION_NAME "VK_KHR_portability_subset" +#endif + static const struct vkd3d_optional_extension_info optional_device_extensions[] = { /* KHR extensions */ @@ -85,6 +93,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(KHR_IMAGE_FORMAT_LIST, KHR_image_format_list), VK_EXTENSION(KHR_MAINTENANCE2, KHR_maintenance2), VK_EXTENSION(KHR_MAINTENANCE3, KHR_maintenance3), + VK_EXTENSION(KHR_PORTABILITY_SUBSET, KHR_portability_subset), VK_EXTENSION(KHR_PUSH_DESCRIPTOR, KHR_push_descriptor), VK_EXTENSION(KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE, KHR_sampler_mirror_clamp_to_edge), VK_EXTENSION(KHR_TIMELINE_SEMAPHORE, KHR_timeline_semaphore), @@ -92,7 +101,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_4444_FORMATS, EXT_4444_formats), VK_EXTENSION(EXT_CALIBRATED_TIMESTAMPS, EXT_calibrated_timestamps), VK_EXTENSION(EXT_CONDITIONAL_RENDERING, EXT_conditional_rendering), - VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), + VK_DEBUG_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), @@ -299,7 +308,7 @@ static unsigned int vkd3d_check_extensions(const VkExtensionProperties *extensio for (i = 0; i < required_extension_count; ++i) { if (!has_extension(extensions, count, required_extensions[i])) - ERR("Required %s extension %s is not supported.\n", + WARN("Required %s extension %s is not supported.\n", extension_type, debugstr_a(required_extensions[i])); ++extension_count; } @@ -327,12 +336,12 @@ static unsigned int vkd3d_check_extensions(const VkExtensionProperties *extensio for (i = 0; i < user_extension_count; ++i) { if (!has_extension(extensions, count, user_extensions[i])) - ERR("Required user %s extension %s is not supported.\n", + WARN("Required user %s extension %s is not supported.\n", extension_type, debugstr_a(user_extensions[i])); ++extension_count; } - assert(!optional_user_extension_count || user_extension_supported); + VKD3D_ASSERT(!optional_user_extension_count || user_extension_supported); for (i = 0; i < optional_user_extension_count; ++i) { if (has_extension(extensions, count, optional_user_extensions[i])) @@ -394,7 +403,7 @@ static unsigned int vkd3d_enable_extensions(const char *extensions[], { extension_count = vkd3d_append_extension(extensions, extension_count, user_extensions[i]); } - assert(!optional_user_extension_count || user_extension_supported); + VKD3D_ASSERT(!optional_user_extension_count || user_extension_supported); for (i = 0; i < optional_user_extension_count; ++i) { if (!user_extension_supported[i]) @@ -575,7 +584,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, if (!create_info->pfn_signal_event) { - ERR("Invalid signal event function pointer.\n"); + WARN("Invalid signal event function pointer.\n"); return E_INVALIDARG; } if (!create_info->pfn_create_thread != !create_info->pfn_join_thread) @@ -585,7 +594,7 @@ static HRESULT vkd3d_instance_init(struct vkd3d_instance *instance, } if (create_info->wchar_size != 2 && create_info->wchar_size != 4) { - ERR("Unexpected WCHAR size %zu.\n", create_info->wchar_size); + WARN("Unexpected WCHAR size %zu.\n", create_info->wchar_size); return E_INVALIDARG; } @@ -822,114 +831,90 @@ struct vkd3d_physical_device_info VkPhysicalDeviceFeatures2 features2; }; -static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *info, struct d3d12_device *device) +static void vkd3d_chain_physical_device_info_structures(struct vkd3d_physical_device_info *info, + struct d3d12_device *device) { - const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; - VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; - VkPhysicalDeviceDescriptorIndexingPropertiesEXT *descriptor_indexing_properties; - VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; - VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; - VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; - VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; - VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; - VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; - VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; - VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; - VkPhysicalDeviceTimelineSemaphoreFeaturesKHR *timeline_semaphore_features; - VkPhysicalDeviceMutableDescriptorTypeFeaturesEXT *mutable_features; - VkPhysicalDeviceDepthClipEnableFeaturesEXT *depth_clip_features; - VkPhysicalDeviceMaintenance3Properties *maintenance3_properties; - VkPhysicalDeviceTransformFeedbackPropertiesEXT *xfb_properties; - VkPhysicalDevice physical_device = device->vk_physical_device; - VkPhysicalDevice4444FormatsFeaturesEXT *formats4444_features; - VkPhysicalDeviceTransformFeedbackFeaturesEXT *xfb_features; struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; - VkPhysicalDeviceSubgroupProperties *subgroup_properties; - - memset(info, 0, sizeof(*info)); - conditional_rendering_features = &info->conditional_rendering_features; - depth_clip_features = &info->depth_clip_features; - descriptor_indexing_features = &info->descriptor_indexing_features; - fragment_shader_interlock_features = &info->fragment_shader_interlock_features; - robustness2_features = &info->robustness2_features; - descriptor_indexing_properties = &info->descriptor_indexing_properties; - maintenance3_properties = &info->maintenance3_properties; - demote_features = &info->demote_features; - buffer_alignment_features = &info->texel_buffer_alignment_features; - buffer_alignment_properties = &info->texel_buffer_alignment_properties; - vertex_divisor_features = &info->vertex_divisor_features; - vertex_divisor_properties = &info->vertex_divisor_properties; - timeline_semaphore_features = &info->timeline_semaphore_features; - mutable_features = &info->mutable_features; - formats4444_features = &info->formats4444_features; - xfb_features = &info->xfb_features; - xfb_properties = &info->xfb_properties; - subgroup_properties = &info->subgroup_properties; - info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + info->features2.pNext = NULL; - conditional_rendering_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; if (vulkan_info->EXT_conditional_rendering) - vk_prepend_struct(&info->features2, conditional_rendering_features); - depth_clip_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; + vk_prepend_struct(&info->features2, &info->conditional_rendering_features); if (vulkan_info->EXT_depth_clip_enable) - vk_prepend_struct(&info->features2, depth_clip_features); - descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; + vk_prepend_struct(&info->features2, &info->depth_clip_features); if (vulkan_info->EXT_descriptor_indexing) - vk_prepend_struct(&info->features2, descriptor_indexing_features); - fragment_shader_interlock_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; + vk_prepend_struct(&info->features2, &info->descriptor_indexing_features); if (vulkan_info->EXT_fragment_shader_interlock) - vk_prepend_struct(&info->features2, fragment_shader_interlock_features); - robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + vk_prepend_struct(&info->features2, &info->fragment_shader_interlock_features); if (vulkan_info->EXT_robustness2) - vk_prepend_struct(&info->features2, robustness2_features); - demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; + vk_prepend_struct(&info->features2, &info->robustness2_features); if (vulkan_info->EXT_shader_demote_to_helper_invocation) - vk_prepend_struct(&info->features2, demote_features); - buffer_alignment_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; + vk_prepend_struct(&info->features2, &info->demote_features); if (vulkan_info->EXT_texel_buffer_alignment) - vk_prepend_struct(&info->features2, buffer_alignment_features); - xfb_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; + vk_prepend_struct(&info->features2, &info->texel_buffer_alignment_features); if (vulkan_info->EXT_transform_feedback) - vk_prepend_struct(&info->features2, xfb_features); - vertex_divisor_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; + vk_prepend_struct(&info->features2, &info->xfb_features); if (vulkan_info->EXT_vertex_attribute_divisor) - vk_prepend_struct(&info->features2, vertex_divisor_features); - timeline_semaphore_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; + vk_prepend_struct(&info->features2, &info->vertex_divisor_features); if (vulkan_info->KHR_timeline_semaphore) - vk_prepend_struct(&info->features2, timeline_semaphore_features); - mutable_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; + vk_prepend_struct(&info->features2, &info->timeline_semaphore_features); if (vulkan_info->EXT_mutable_descriptor_type) - vk_prepend_struct(&info->features2, mutable_features); - formats4444_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; + vk_prepend_struct(&info->features2, &info->mutable_features); if (vulkan_info->EXT_4444_formats) - vk_prepend_struct(&info->features2, formats4444_features); - - if (vulkan_info->KHR_get_physical_device_properties2) - VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); - else - VK_CALL(vkGetPhysicalDeviceFeatures(physical_device, &info->features2.features)); + vk_prepend_struct(&info->features2, &info->formats4444_features); - info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + info->properties2.pNext = NULL; - maintenance3_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; if (vulkan_info->KHR_maintenance3) - vk_prepend_struct(&info->properties2, maintenance3_properties); - descriptor_indexing_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; + vk_prepend_struct(&info->properties2, &info->maintenance3_properties); if (vulkan_info->EXT_descriptor_indexing) - vk_prepend_struct(&info->properties2, descriptor_indexing_properties); - buffer_alignment_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; + vk_prepend_struct(&info->properties2, &info->descriptor_indexing_properties); if (vulkan_info->EXT_texel_buffer_alignment) - vk_prepend_struct(&info->properties2, buffer_alignment_properties); - xfb_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; + vk_prepend_struct(&info->properties2, &info->texel_buffer_alignment_properties); if (vulkan_info->EXT_transform_feedback) - vk_prepend_struct(&info->properties2, xfb_properties); - vertex_divisor_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; + vk_prepend_struct(&info->properties2, &info->xfb_properties); if (vulkan_info->EXT_vertex_attribute_divisor) - vk_prepend_struct(&info->properties2, vertex_divisor_properties); - subgroup_properties->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; + vk_prepend_struct(&info->properties2, &info->vertex_divisor_properties); if (d3d12_device_environment_is_vulkan_min_1_1(device)) - vk_prepend_struct(&info->properties2, subgroup_properties); + vk_prepend_struct(&info->properties2, &info->subgroup_properties); +} + +static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *info, struct d3d12_device *device) +{ + const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; + VkPhysicalDevice physical_device = device->vk_physical_device; + struct vkd3d_vulkan_info *vulkan_info = &device->vk_info; + + memset(info, 0, sizeof(*info)); + + info->features2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2; + info->conditional_rendering_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT; + info->depth_clip_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT; + info->descriptor_indexing_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; + info->fragment_shader_interlock_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; + info->robustness2_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; + info->demote_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; + info->texel_buffer_alignment_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT; + info->xfb_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT; + info->vertex_divisor_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT; + info->timeline_semaphore_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES_KHR; + info->mutable_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_EXT; + info->formats4444_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT; + + info->properties2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; + info->maintenance3_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES; + info->descriptor_indexing_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES_EXT; + info->texel_buffer_alignment_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT; + info->xfb_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT; + info->vertex_divisor_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT; + info->subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; + + vkd3d_chain_physical_device_info_structures(info, device); + + if (vulkan_info->KHR_get_physical_device_properties2) + VK_CALL(vkGetPhysicalDeviceFeatures2KHR(physical_device, &info->features2)); + else + VK_CALL(vkGetPhysicalDeviceFeatures(physical_device, &info->features2.features)); if (vulkan_info->KHR_get_physical_device_properties2) VK_CALL(vkGetPhysicalDeviceProperties2KHR(physical_device, &info->properties2)); @@ -1522,7 +1507,7 @@ static bool d3d12_device_supports_typed_uav_load_additional_formats(const struct for (i = 0; i < ARRAY_SIZE(additional_formats); ++i) { format = vkd3d_get_format(device, additional_formats[i], false); - assert(format); + VKD3D_ASSERT(format); VK_CALL(vkGetPhysicalDeviceFormatProperties(device->vk_physical_device, format->vk_format, &properties)); if (!((properties.linearTilingFeatures | properties.optimalTilingFeatures) & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) @@ -1634,6 +1619,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->device_limits = physical_device_info->properties2.properties.limits; vulkan_info->sparse_properties = physical_device_info->properties2.properties.sparseProperties; + vulkan_info->geometry_shaders = physical_device_info->features2.features.geometryShader; + vulkan_info->tessellation_shaders = physical_device_info->features2.features.tessellationShader; vulkan_info->sparse_binding = features->sparseBinding; vulkan_info->sparse_residency_3d = features->sparseResidencyImage3D; vulkan_info->rasterization_stream = physical_device_info->xfb_properties.transformFeedbackRasterizationStreamSelect; @@ -1829,6 +1816,8 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vkd3d_device_descriptor_limits_init(&vulkan_info->descriptor_limits, &physical_device_info->properties2.properties.limits); + vkd3d_chain_physical_device_info_structures(physical_device_info, device); + return S_OK; } @@ -2166,7 +2155,7 @@ static HRESULT vkd3d_create_vk_device(struct d3d12_device *device, vkd3d_free(extensions); if (vr < 0) { - ERR("Failed to create Vulkan device, vr %d.\n", vr); + WARN("Failed to create Vulkan device, vr %d.\n", vr); return hresult_from_vk_result(vr); } @@ -2552,11 +2541,13 @@ static void device_init_descriptor_pool_sizes(struct d3d12_device *device) VKD3D_MAX_UAV_CLEAR_DESCRIPTORS_PER_TYPE); pool_sizes[1].type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; pool_sizes[1].descriptorCount = pool_sizes[0].descriptorCount; - device->vk_pool_count = 2; + pool_sizes[2].type = VK_DESCRIPTOR_TYPE_SAMPLER; + pool_sizes[2].descriptorCount = min(limits->sampler_max_descriptors, D3D12_MAX_LIVE_STATIC_SAMPLERS); + device->vk_pool_count = 3; return; } - assert(ARRAY_SIZE(device->vk_pool_sizes) >= 6); + VKD3D_ASSERT(ARRAY_SIZE(device->vk_pool_sizes) >= 6); pool_sizes[0].type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; pool_sizes[0].descriptorCount = min(limits->uniform_buffer_max_descriptors, VKD3D_MAX_VIRTUAL_HEAP_DESCRIPTORS_PER_TYPE); @@ -3128,8 +3119,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device9 *i initial_pipeline_state, &object))) return hr; - return return_interface(&object->ID3D12GraphicsCommandList5_iface, - &IID_ID3D12GraphicsCommandList5, riid, command_list); + return return_interface(&object->ID3D12GraphicsCommandList6_iface, + &IID_ID3D12GraphicsCommandList6, riid, command_list); } /* Direct3D feature levels restrict which formats can be optionally supported. */ @@ -3806,7 +3797,8 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device9 return E_INVALIDARG; } - data->UnalignedBlockTexturesSupported = FALSE; + /* Vulkan does not restrict block texture alignment. */ + data->UnalignedBlockTexturesSupported = TRUE; TRACE("Unaligned block texture support %#x.\n", data->UnalignedBlockTexturesSupported); return S_OK; @@ -5262,7 +5254,7 @@ struct d3d12_device *unsafe_impl_from_ID3D12Device9(ID3D12Device9 *iface) { if (!iface) return NULL; - assert(iface->lpVtbl == &d3d12_device_vtbl); + VKD3D_ASSERT(iface->lpVtbl == &d3d12_device_vtbl); return impl_from_ID3D12Device9(iface); } diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index c897d9f2c5a..6d6820d3752 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -312,7 +312,7 @@ static ULONG STDMETHODCALLTYPE d3d12_heap_AddRef(ID3D12Heap *iface) TRACE("%p increasing refcount to %u.\n", heap, refcount); - assert(!heap->is_private); + VKD3D_ASSERT(!heap->is_private); return refcount; } @@ -443,7 +443,7 @@ struct d3d12_heap *unsafe_impl_from_ID3D12Heap(ID3D12Heap *iface) { if (!iface) return NULL; - assert(iface->lpVtbl == &d3d12_heap_vtbl); + VKD3D_ASSERT(iface->lpVtbl == &d3d12_heap_vtbl); return impl_from_ID3D12Heap(iface); } @@ -950,8 +950,8 @@ HRESULT vkd3d_get_image_allocation_info(struct d3d12_device *device, bool tiled; HRESULT hr; - assert(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); - assert(d3d12_resource_validate_desc(desc, device) == S_OK); + VKD3D_ASSERT(desc->Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); + VKD3D_ASSERT(d3d12_resource_validate_desc(desc, device) == S_OK); if (!desc->MipLevels) { @@ -1044,7 +1044,7 @@ static bool d3d12_resource_validate_box(const struct d3d12_resource *resource, depth = d3d12_resource_desc_get_depth(&resource->desc, mip_level); vkd3d_format = resource->format; - assert(vkd3d_format); + VKD3D_ASSERT(vkd3d_format); width_mask = vkd3d_format->block_width - 1; height_mask = vkd3d_format->block_height - 1; @@ -1162,7 +1162,7 @@ static bool d3d12_resource_init_tiles(struct d3d12_resource *resource, struct d3 if (d3d12_resource_is_buffer(resource)) { - assert(subresource_count == 1); + VKD3D_ASSERT(subresource_count == 1); VK_CALL(vkGetBufferMemoryRequirements(device->vk_device, resource->u.vk_buffer, &requirements)); if (requirements.alignment > D3D12_TILED_RESOURCE_TILE_SIZE_IN_BYTES) @@ -1381,7 +1381,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_resource_GetDevice(ID3D12Resource2 *iface static void *d3d12_resource_get_map_ptr(struct d3d12_resource *resource) { - assert(resource->heap->map_ptr); + VKD3D_ASSERT(resource->heap->map_ptr); return (uint8_t *)resource->heap->map_ptr + resource->heap_offset; } @@ -1771,7 +1771,7 @@ struct d3d12_resource *unsafe_impl_from_ID3D12Resource(ID3D12Resource *iface) { if (!iface) return NULL; - assert(iface->lpVtbl == (ID3D12ResourceVtbl *)&d3d12_resource_vtbl); + VKD3D_ASSERT(iface->lpVtbl == (ID3D12ResourceVtbl *)&d3d12_resource_vtbl); return impl_from_ID3D12Resource(iface); } @@ -1809,14 +1809,6 @@ static bool d3d12_resource_validate_texture_format(const D3D12_RESOURCE_DESC1 *d return false; } - if (align(desc->Width, format->block_width) != desc->Width - || align(desc->Height, format->block_height) != desc->Height) - { - WARN("Invalid size %"PRIu64"x%u for block compressed format %#x.\n", - desc->Width, desc->Height, desc->Format); - return false; - } - return true; } @@ -2173,7 +2165,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, if (heap_offset > heap->desc.SizeInBytes || requirements.size > heap->desc.SizeInBytes - heap_offset) { - ERR("Heap too small for the resource (offset %"PRIu64", resource size %"PRIu64", heap size %"PRIu64".\n", + WARN("Heap too small for the resource (offset %"PRIu64", resource size %"PRIu64", heap size %"PRIu64".\n", heap_offset, requirements.size, heap->desc.SizeInBytes); return E_INVALIDARG; } @@ -2192,7 +2184,7 @@ static HRESULT vkd3d_bind_heap_memory(struct d3d12_device *device, goto allocate_memory; } - /* Syncronisation is not required for binding, but vkMapMemory() may be called + /* Synchronisation is not required for binding, but vkMapMemory() may be called * from another thread and it requires exclusive access. */ vkd3d_mutex_lock(&heap->mutex); @@ -2414,7 +2406,7 @@ static struct vkd3d_view *vkd3d_view_create(uint32_t magic, VkDescriptorType vk_ { struct vkd3d_view *view; - assert(magic); + VKD3D_ASSERT(magic); if (!(view = vkd3d_desc_object_cache_get(&device->view_desc_cache))) { @@ -2544,7 +2536,7 @@ static void d3d12_desc_write_vk_heap_null_descriptor(struct d3d12_descriptor_hea writes->vk_descriptor_writes[i].pTexelBufferView = &writes->null_vk_buffer_view; break; default: - assert(false); + VKD3D_ASSERT(false); break; } if (++i < ARRAY_SIZE(writes->vk_descriptor_writes) - 1) @@ -2733,7 +2725,7 @@ void d3d12_desc_copy(struct d3d12_desc *dst, const struct d3d12_desc *src, struc { struct d3d12_desc tmp; - assert(dst != src); + VKD3D_ASSERT(dst != src); tmp.s.u.object = d3d12_desc_get_object_ref(src, device); descriptor_heap_write_atomic(dst_heap, dst, &tmp, device); @@ -2756,7 +2748,7 @@ static VkDeviceSize vkd3d_get_required_texel_buffer_alignment(const struct d3d12 if (properties->storageTexelBufferOffsetSingleTexelAlignment && properties->uniformTexelBufferOffsetSingleTexelAlignment) { - assert(!vkd3d_format_is_compressed(format)); + VKD3D_ASSERT(!vkd3d_format_is_compressed(format)); return min(format->byte_count, alignment); } @@ -2856,7 +2848,7 @@ static bool vkd3d_create_buffer_view_for_resource(struct d3d12_device *device, return false; } - assert(d3d12_resource_is_buffer(resource)); + VKD3D_ASSERT(d3d12_resource_is_buffer(resource)); return vkd3d_create_buffer_view(device, magic, resource->u.vk_buffer, format, offset * element_size, size * element_size, view); @@ -2987,7 +2979,7 @@ static VkComponentSwizzle swizzle_vk_component(const VkComponentMapping *compone break; } - assert(component != VK_COMPONENT_SWIZZLE_IDENTITY); + VKD3D_ASSERT(component != VK_COMPONENT_SWIZZLE_IDENTITY); return component; } @@ -3519,8 +3511,8 @@ static void vkd3d_create_buffer_uav(struct d3d12_desc *descriptor, struct d3d12_ { const struct vkd3d_format *format; - assert(d3d12_resource_is_buffer(counter_resource)); - assert(desc->u.Buffer.StructureByteStride); + VKD3D_ASSERT(d3d12_resource_is_buffer(counter_resource)); + VKD3D_ASSERT(desc->u.Buffer.StructureByteStride); format = vkd3d_get_format(device, DXGI_FORMAT_R32_UINT, false); if (!vkd3d_create_vk_buffer_view(device, counter_resource->u.vk_buffer, format, @@ -3640,7 +3632,7 @@ bool vkd3d_create_raw_buffer_view(struct d3d12_device *device, } resource = vkd3d_gpu_va_allocator_dereference(&device->gpu_va_allocator, gpu_address); - assert(d3d12_resource_is_buffer(resource)); + VKD3D_ASSERT(d3d12_resource_is_buffer(resource)); return vkd3d_create_vk_buffer_view(device, resource->u.vk_buffer, format, gpu_address - resource->gpu_address, VK_WHOLE_SIZE, vk_buffer_view); } @@ -3912,7 +3904,7 @@ void d3d12_rtv_desc_create_rtv(struct d3d12_rtv_desc *rtv_desc, struct d3d12_dev vkd3d_desc.layer_count = resource->desc.DepthOrArraySize; } - assert(d3d12_resource_is_texture(resource)); + VKD3D_ASSERT(d3d12_resource_is_texture(resource)); if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_RTV, resource->u.vk_image, &vkd3d_desc, &view)) return; @@ -3998,7 +3990,7 @@ void d3d12_dsv_desc_create_dsv(struct d3d12_dsv_desc *dsv_desc, struct d3d12_dev } } - assert(d3d12_resource_is_texture(resource)); + VKD3D_ASSERT(d3d12_resource_is_texture(resource)); if (!vkd3d_create_texture_view(device, VKD3D_DESCRIPTOR_MAGIC_DSV, resource->u.vk_image, &vkd3d_desc, &view)) return; @@ -4357,7 +4349,11 @@ static HRESULT d3d12_descriptor_heap_init(struct d3d12_descriptor_heap *descript return hr; descriptor_heap->use_vk_heaps = device->use_vk_heaps && (desc->Flags & D3D12_DESCRIPTOR_HEAP_FLAG_SHADER_VISIBLE); - d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc); + if (FAILED(hr = d3d12_descriptor_heap_vk_descriptor_sets_init(descriptor_heap, device, desc))) + { + vkd3d_private_store_destroy(&descriptor_heap->private_store); + return hr; + } vkd3d_mutex_init(&descriptor_heap->vk_sets_mutex); d3d12_device_add_ref(descriptor_heap->device = device); @@ -4563,7 +4559,7 @@ struct d3d12_query_heap *unsafe_impl_from_ID3D12QueryHeap(ID3D12QueryHeap *iface { if (!iface) return NULL; - assert(iface->lpVtbl == &d3d12_query_heap_vtbl); + VKD3D_ASSERT(iface->lpVtbl == &d3d12_query_heap_vtbl); return impl_from_ID3D12QueryHeap(iface); } diff --git a/libs/vkd3d/libs/vkd3d/state.c b/libs/vkd3d/libs/vkd3d/state.c index 7197193523d..d9d200e4850 100644 --- a/libs/vkd3d/libs/vkd3d/state.c +++ b/libs/vkd3d/libs/vkd3d/state.c @@ -194,7 +194,7 @@ struct d3d12_root_signature *unsafe_impl_from_ID3D12RootSignature(ID3D12RootSign { if (!iface) return NULL; - assert(iface->lpVtbl == &d3d12_root_signature_vtbl); + VKD3D_ASSERT(iface->lpVtbl == &d3d12_root_signature_vtbl); return impl_from_ID3D12RootSignature(iface); } @@ -345,15 +345,93 @@ struct d3d12_root_signature_info unsigned int sampler_unbounded_range_count; size_t cost; + + struct d3d12_root_signature_info_range + { + enum vkd3d_shader_descriptor_type type; + unsigned int space; + unsigned int base_idx; + unsigned int count; + D3D12_SHADER_VISIBILITY visibility; + } *ranges; + size_t range_count, range_capacity; }; +static HRESULT d3d12_root_signature_info_add_range(struct d3d12_root_signature_info *info, + enum vkd3d_shader_descriptor_type type, D3D12_SHADER_VISIBILITY visibility, + unsigned int space, unsigned int base_idx, unsigned int count) +{ + struct d3d12_root_signature_info_range *range; + + if (!vkd3d_array_reserve((void **)&info->ranges, &info->range_capacity, info->range_count + 1, + sizeof(*info->ranges))) + return E_OUTOFMEMORY; + + range = &info->ranges[info->range_count++]; + range->type = type; + range->space = space; + range->base_idx = base_idx; + range->count = count; + range->visibility = visibility; + + return S_OK; +} + +static int d3d12_root_signature_info_range_compare(const void *a, const void *b) +{ + const struct d3d12_root_signature_info_range *range_a = a, *range_b = b; + int ret; + + if ((ret = vkd3d_u32_compare(range_a->type, range_b->type))) + return ret; + + if ((ret = vkd3d_u32_compare(range_a->space, range_b->space))) + return ret; + + return vkd3d_u32_compare(range_a->base_idx, range_b->base_idx); +} + +static HRESULT d3d12_root_signature_info_range_validate(const struct d3d12_root_signature_info_range *ranges, + unsigned int count, D3D12_SHADER_VISIBILITY visibility) +{ + const struct d3d12_root_signature_info_range *range, *next; + unsigned int i = 0, j; + + while (i < count) + { + range = &ranges[i]; + + for (j = i + 1; j < count; ++j) + { + next = &ranges[j]; + + if (range->visibility != D3D12_SHADER_VISIBILITY_ALL + && next->visibility != D3D12_SHADER_VISIBILITY_ALL + && range->visibility != next->visibility) + continue; + + if (range->type == next->type && range->space == next->space + && range->base_idx + range->count > next->base_idx) + return E_INVALIDARG; + + break; + } + + i = j; + } + + return S_OK; +} + static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_signature_info *info, - const D3D12_ROOT_DESCRIPTOR_TABLE *table, bool use_array) + const D3D12_ROOT_PARAMETER *param, bool use_array) { bool cbv_unbounded_range = false, srv_unbounded_range = false, uav_unbounded_range = false; + const D3D12_ROOT_DESCRIPTOR_TABLE *table = ¶m->u.DescriptorTable; bool sampler_unbounded_range = false; bool unbounded = false; unsigned int i, count; + HRESULT hr; for (i = 0; i < table->NumDescriptorRanges; ++i) { @@ -381,6 +459,12 @@ static HRESULT d3d12_root_signature_info_count_descriptors(struct d3d12_root_sig } count = range->NumDescriptors; + + if (FAILED(hr = d3d12_root_signature_info_add_range(info, + vkd3d_descriptor_type_from_d3d12_range_type(range->RangeType), + param->ShaderVisibility, range->RegisterSpace, range->BaseShaderRegister, count))) + return hr; + if (range->NumDescriptors == UINT_MAX) { unbounded = true; @@ -453,8 +537,8 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i { case D3D12_ROOT_PARAMETER_TYPE_DESCRIPTOR_TABLE: if (FAILED(hr = d3d12_root_signature_info_count_descriptors(info, - &p->u.DescriptorTable, use_array))) - return hr; + p, use_array))) + goto done; ++info->cost; break; @@ -463,35 +547,80 @@ static HRESULT d3d12_root_signature_info_from_desc(struct d3d12_root_signature_i ++info->cbv_count; ++info->binding_count; info->cost += 2; + if (FAILED(hr = d3d12_root_signature_info_add_range(info, + VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, p->ShaderVisibility, + p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1))) + goto done; break; + case D3D12_ROOT_PARAMETER_TYPE_SRV: ++info->root_descriptor_count; ++info->srv_count; ++info->binding_count; info->cost += 2; + if (FAILED(hr = d3d12_root_signature_info_add_range(info, + VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, p->ShaderVisibility, + p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1))) + goto done; break; + case D3D12_ROOT_PARAMETER_TYPE_UAV: ++info->root_descriptor_count; ++info->uav_count; ++info->binding_count; info->cost += 2; + if (FAILED(hr = d3d12_root_signature_info_add_range(info, + VKD3D_SHADER_DESCRIPTOR_TYPE_UAV, p->ShaderVisibility, + p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1))) + goto done; break; case D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS: ++info->root_constant_count; info->cost += p->u.Constants.Num32BitValues; + if (FAILED(hr = d3d12_root_signature_info_add_range(info, + VKD3D_SHADER_DESCRIPTOR_TYPE_CBV, p->ShaderVisibility, + p->u.Constants.RegisterSpace, p->u.Constants.ShaderRegister, 1))) + goto done; break; default: FIXME("Unhandled type %#x for parameter %u.\n", p->ParameterType, i); - return E_NOTIMPL; + hr = E_NOTIMPL; + goto done; } } info->binding_count += desc->NumStaticSamplers; info->sampler_count += desc->NumStaticSamplers; - return S_OK; + for (i = 0; i < desc->NumStaticSamplers; ++i) + { + const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; + + if (FAILED(hr = d3d12_root_signature_info_add_range(info, + VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->ShaderVisibility, + s->RegisterSpace, s->ShaderRegister, 1))) + goto done; + } + + qsort(info->ranges, info->range_count, sizeof(*info->ranges), + d3d12_root_signature_info_range_compare); + + for (i = D3D12_SHADER_VISIBILITY_VERTEX; i <= D3D12_SHADER_VISIBILITY_MESH; ++i) + { + if (FAILED(hr = d3d12_root_signature_info_range_validate(info->ranges, info->range_count, i))) + goto done; + } + + hr = S_OK; +done: + vkd3d_free(info->ranges); + info->ranges = NULL; + info->range_count = 0; + info->range_capacity = 0; + + return hr; } static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signature *root_signature, @@ -509,14 +638,18 @@ static HRESULT d3d12_root_signature_init_push_constants(struct d3d12_root_signat for (i = 0; i < desc->NumParameters; ++i) { const D3D12_ROOT_PARAMETER *p = &desc->pParameters[i]; + D3D12_SHADER_VISIBILITY visibility; + if (p->ParameterType != D3D12_ROOT_PARAMETER_TYPE_32BIT_CONSTANTS) continue; - assert(p->ShaderVisibility <= D3D12_SHADER_VISIBILITY_PIXEL); - push_constants[p->ShaderVisibility].stageFlags = use_vk_heaps ? VK_SHADER_STAGE_ALL - : stage_flags_from_visibility(p->ShaderVisibility); - push_constants[p->ShaderVisibility].size += align(p->u.Constants.Num32BitValues, 4) * sizeof(uint32_t); + visibility = use_vk_heaps ? D3D12_SHADER_VISIBILITY_ALL : p->ShaderVisibility; + VKD3D_ASSERT(visibility <= D3D12_SHADER_VISIBILITY_PIXEL); + + push_constants[visibility].stageFlags = stage_flags_from_visibility(visibility); + push_constants[visibility].size += align(p->u.Constants.Num32BitValues, 4) * sizeof(uint32_t); } + if (push_constants[D3D12_SHADER_VISIBILITY_ALL].size) { /* When D3D12_SHADER_VISIBILITY_ALL is used we use a single push @@ -645,7 +778,7 @@ static HRESULT d3d12_root_signature_append_descriptor_set_layout(struct d3d12_ro return S_OK; } -static void d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, +static HRESULT d3d12_root_signature_append_vk_binding(struct d3d12_root_signature *root_signature, enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int register_idx, bool buffer_descriptor, enum vkd3d_shader_visibility shader_visibility, unsigned int descriptor_count, struct vkd3d_descriptor_set_context *context) @@ -670,33 +803,38 @@ static void d3d12_root_signature_append_vk_binding(struct d3d12_root_signature * } if (context->unbounded_offset != UINT_MAX) - d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); + return d3d12_root_signature_append_descriptor_set_layout(root_signature, context, 0); + + return S_OK; } -static uint32_t d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, +static HRESULT d3d12_root_signature_assign_vk_bindings(struct d3d12_root_signature *root_signature, enum vkd3d_shader_descriptor_type descriptor_type, unsigned int register_space, unsigned int base_register_idx, unsigned int binding_count, bool is_buffer_descriptor, bool duplicate_descriptors, - enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context) + enum vkd3d_shader_visibility shader_visibility, struct vkd3d_descriptor_set_context *context, + uint32_t *first_binding) { - uint32_t first_binding; unsigned int i; + HRESULT hr; is_buffer_descriptor |= descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; duplicate_descriptors = (descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_SRV || descriptor_type == VKD3D_SHADER_DESCRIPTOR_TYPE_UAV) && duplicate_descriptors; - first_binding = context->descriptor_binding; + *first_binding = context->descriptor_binding; for (i = 0; i < binding_count; ++i) { - if (duplicate_descriptors) - d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, - base_register_idx + i, true, shader_visibility, 1, context); + if (duplicate_descriptors + && FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, + register_space, base_register_idx + i, true, shader_visibility, 1, context))) + return hr; - d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, - base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context); + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, register_space, + base_register_idx + i, is_buffer_descriptor, shader_visibility, 1, context))) + return hr; } - return first_binding; + return S_OK; } static uint32_t vkd3d_descriptor_magic_from_d3d12(D3D12_DESCRIPTOR_RANGE_TYPE type) @@ -764,6 +902,7 @@ static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_r enum vkd3d_shader_visibility shader_visibility = vkd3d_shader_visibility_from_d3d12(visibility); bool is_buffer = range->type == VKD3D_SHADER_DESCRIPTOR_TYPE_CBV; enum vkd3d_shader_descriptor_type descriptor_type = range->type; + HRESULT hr; if (range->descriptor_count == UINT_MAX) context->unbounded_offset = range->offset; @@ -775,8 +914,9 @@ static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_r return E_NOTIMPL; ++context->current_binding; - d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, - range->base_register_idx, true, shader_visibility, range->vk_binding_count, context); + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, + range->base_register_idx, true, shader_visibility, range->vk_binding_count, context))) + return hr; } if (!vk_binding_from_d3d12_descriptor_range(context->current_binding, @@ -784,8 +924,9 @@ static HRESULT d3d12_root_signature_init_descriptor_array_binding(struct d3d12_r return E_NOTIMPL; ++context->current_binding; - d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, - range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context); + if (FAILED(hr = d3d12_root_signature_append_vk_binding(root_signature, descriptor_type, range->register_space, + range->base_register_idx, is_buffer, shader_visibility, range->vk_binding_count, context))) + return hr; context->unbounded_offset = UINT_MAX; @@ -955,20 +1096,6 @@ static void d3d12_root_signature_map_descriptor_unbounded_binding(struct d3d12_r descriptor_offset, is_buffer, shader_visibility, context); } -static int compare_register_range(const void *a, const void *b) -{ - const struct d3d12_root_descriptor_table_range *range_a = a, *range_b = b; - int ret; - - if ((ret = vkd3d_u32_compare(range_a->type, range_b->type))) - return ret; - - if ((ret = vkd3d_u32_compare(range_a->register_space, range_b->register_space))) - return ret; - - return vkd3d_u32_compare(range_a->base_register_idx, range_b->base_register_idx); -} - static int compare_descriptor_range(const void *a, const void *b) { const struct d3d12_root_descriptor_table_range *range_a = a, *range_b = b; @@ -983,25 +1110,6 @@ static int compare_descriptor_range(const void *a, const void *b) return (range_a->descriptor_count == UINT_MAX) - (range_b->descriptor_count == UINT_MAX); } -static HRESULT validate_descriptor_register_ranges(const struct d3d12_root_descriptor_table_range *ranges, - unsigned int count) -{ - const struct d3d12_root_descriptor_table_range *range, *prev; - unsigned int i; - - for (i = 1; i < count; ++i) - { - range = &ranges[i]; - prev = &ranges[i - 1]; - - if (range->type == prev->type && range->register_space == prev->register_space - && range->base_register_idx - prev->base_register_idx < prev->descriptor_count) - return E_INVALIDARG; - } - - return S_OK; -} - static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_root_signature *root_signature, const D3D12_ROOT_SIGNATURE_DESC *desc, const struct d3d12_root_signature_info *info, struct vkd3d_descriptor_set_context *context) @@ -1062,10 +1170,6 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo offset += range->NumDescriptors; } - qsort(table->ranges, range_count, sizeof(*table->ranges), compare_register_range); - if (FAILED(hr = validate_descriptor_register_ranges(table->ranges, range_count))) - return hr; - qsort(table->ranges, range_count, sizeof(*table->ranges), compare_descriptor_range); for (j = 0; j < range_count; ++j) @@ -1130,9 +1234,10 @@ static HRESULT d3d12_root_signature_init_root_descriptor_tables(struct d3d12_roo cur_binding = context->current_binding; - vk_binding = d3d12_root_signature_assign_vk_bindings(root_signature, + if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, range->type, range->register_space, range->base_register_idx, range->descriptor_count, false, true, - shader_visibility, context); + shader_visibility, context, &vk_binding))) + return hr; /* Unroll descriptor range. */ for (k = 0; k < range->descriptor_count; ++k) @@ -1175,6 +1280,7 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign { VkDescriptorSetLayoutBinding *cur_binding = context->current_binding; unsigned int i; + HRESULT hr; root_signature->push_descriptor_mask = 0; @@ -1188,10 +1294,11 @@ static HRESULT d3d12_root_signature_init_root_descriptors(struct d3d12_root_sign root_signature->push_descriptor_mask |= 1u << i; - cur_binding->binding = d3d12_root_signature_assign_vk_bindings(root_signature, + if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, vkd3d_descriptor_type_from_d3d12_root_parameter_type(p->ParameterType), p->u.Descriptor.RegisterSpace, p->u.Descriptor.ShaderRegister, 1, true, false, - vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context); + vkd3d_shader_visibility_from_d3d12(p->ShaderVisibility), context, &cur_binding->binding))) + return hr; cur_binding->descriptorType = vk_descriptor_type_from_d3d12_root_parameter(p->ParameterType); cur_binding->descriptorCount = 1; cur_binding->stageFlags = stage_flags_from_visibility(p->ShaderVisibility); @@ -1215,7 +1322,7 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa unsigned int i; HRESULT hr; - assert(root_signature->static_sampler_count == desc->NumStaticSamplers); + VKD3D_ASSERT(root_signature->static_sampler_count == desc->NumStaticSamplers); for (i = 0; i < desc->NumStaticSamplers; ++i) { const D3D12_STATIC_SAMPLER_DESC *s = &desc->pStaticSamplers[i]; @@ -1223,9 +1330,10 @@ static HRESULT d3d12_root_signature_init_static_samplers(struct d3d12_root_signa if (FAILED(hr = vkd3d_create_static_sampler(device, s, &root_signature->static_samplers[i]))) return hr; - cur_binding->binding = d3d12_root_signature_assign_vk_bindings(root_signature, + if (FAILED(hr = d3d12_root_signature_assign_vk_bindings(root_signature, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, s->RegisterSpace, s->ShaderRegister, 1, false, false, - vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context); + vkd3d_shader_visibility_from_d3d12(s->ShaderVisibility), context, &cur_binding->binding))) + return hr; cur_binding->descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER; cur_binding->descriptorCount = 1; cur_binding->stageFlags = stage_flags_from_visibility(s->ShaderVisibility); @@ -1600,7 +1708,7 @@ static HRESULT vkd3d_render_pass_cache_create_pass_locked(struct vkd3d_render_pa have_depth_stencil = key->depth_enable || key->stencil_enable; rt_count = have_depth_stencil ? key->attachment_count - 1 : key->attachment_count; - assert(rt_count <= D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT); + VKD3D_ASSERT(rt_count <= D3D12_SIMULTANEOUS_RENDER_TARGET_COUNT); for (index = 0, attachment_index = 0; index < rt_count; ++index) { @@ -2140,7 +2248,7 @@ struct d3d12_pipeline_state *unsafe_impl_from_ID3D12PipelineState(ID3D12Pipeline { if (!iface) return NULL; - assert(iface->lpVtbl == &d3d12_pipeline_state_vtbl); + VKD3D_ASSERT(iface->lpVtbl == &d3d12_pipeline_state_vtbl); return impl_from_ID3D12PipelineState(iface); } @@ -2296,7 +2404,7 @@ static HRESULT d3d12_pipeline_state_init_uav_counters(struct d3d12_pipeline_stat unsigned int i, j; HRESULT hr; - assert(vkd3d_popcount(stage_flags) == 1); + VKD3D_ASSERT(vkd3d_popcount(stage_flags) == 1); for (i = 0; i < shader_info->descriptor_count; ++i) { @@ -2911,7 +3019,7 @@ static HRESULT d3d12_graphics_pipeline_state_create_render_pass( if (dsv_format) { - assert(graphics->ds_desc.front.writeMask == graphics->ds_desc.back.writeMask); + VKD3D_ASSERT(graphics->ds_desc.front.writeMask == graphics->ds_desc.back.writeMask); key.depth_enable = graphics->ds_desc.depthTestEnable; key.stencil_enable = graphics->ds_desc.stencilTestEnable; key.depth_stencil_write = graphics->ds_desc.depthWriteEnable @@ -2928,7 +3036,7 @@ static HRESULT d3d12_graphics_pipeline_state_create_render_pass( if (key.attachment_count != ARRAY_SIZE(key.vk_formats)) key.vk_formats[ARRAY_SIZE(key.vk_formats) - 1] = VK_FORMAT_UNDEFINED; for (i = key.attachment_count; i < ARRAY_SIZE(key.vk_formats); ++i) - assert(key.vk_formats[i] == VK_FORMAT_UNDEFINED); + VKD3D_ASSERT(key.vk_formats[i] == VK_FORMAT_UNDEFINED); key.padding = 0; key.sample_count = graphics->ms_desc.rasterizationSamples; @@ -3476,7 +3584,7 @@ static HRESULT d3d12_pipeline_state_init_graphics(struct d3d12_pipeline_state *s graphics->ms_desc.pSampleMask = NULL; if (desc->sample_mask != ~0u) { - assert(DIV_ROUND_UP(sample_count, 32) <= ARRAY_SIZE(graphics->sample_mask)); + VKD3D_ASSERT(DIV_ROUND_UP(sample_count, 32) <= ARRAY_SIZE(graphics->sample_mask)); graphics->sample_mask[0] = desc->sample_mask; graphics->sample_mask[1] = 0xffffffffu; graphics->ms_desc.pSampleMask = graphics->sample_mask; @@ -3769,7 +3877,7 @@ VkPipeline d3d12_pipeline_state_get_or_create_pipeline(struct d3d12_pipeline_sta .pDynamicStates = dynamic_states, }; - assert(d3d12_pipeline_state_is_graphics(state)); + VKD3D_ASSERT(d3d12_pipeline_state_is_graphics(state)); memset(&pipeline_key, 0, sizeof(pipeline_key)); pipeline_key.topology = topology; diff --git a/libs/vkd3d/libs/vkd3d/utils.c b/libs/vkd3d/libs/vkd3d/utils.c index 11029c9f5f9..831dc07af56 100644 --- a/libs/vkd3d/libs/vkd3d/utils.c +++ b/libs/vkd3d/libs/vkd3d/utils.c @@ -331,7 +331,7 @@ static HRESULT vkd3d_init_format_compatibility_lists(struct d3d12_device *device if (j >= current_list->format_count) { - assert(current_list->format_count < VKD3D_MAX_COMPATIBLE_FORMAT_COUNT); + VKD3D_ASSERT(current_list->format_count < VKD3D_MAX_COMPATIBLE_FORMAT_COUNT); current_list->vk_formats[current_list->format_count++] = vk_format; } } @@ -427,7 +427,7 @@ static const struct vkd3d_format *vkd3d_get_depth_stencil_format(const struct d3 const struct vkd3d_format *formats; unsigned int i; - assert(device); + VKD3D_ASSERT(device); formats = device->depth_stencil_formats; for (i = 0; i < ARRAY_SIZE(vkd3d_depth_stencil_formats); ++i) diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c index c7431bd821b..9eccec111c7 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c +++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c @@ -38,12 +38,12 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, } if (!create_info->instance && !create_info->instance_create_info) { - ERR("Instance or instance create info is required.\n"); + WARN("Instance or instance create info is required.\n"); return E_INVALIDARG; } if (create_info->instance && create_info->instance_create_info) { - ERR("Instance and instance create info are mutually exclusive parameters.\n"); + WARN("Instance and instance create info are mutually exclusive parameters.\n"); return E_INVALIDARG; } @@ -153,7 +153,7 @@ static const D3D12_ROOT_SIGNATURE_DESC * STDMETHODCALLTYPE d3d12_root_signature_ TRACE("iface %p.\n", iface); - assert(deserializer->desc.d3d12.Version == D3D_ROOT_SIGNATURE_VERSION_1_0); + VKD3D_ASSERT(deserializer->desc.d3d12.Version == D3D_ROOT_SIGNATURE_VERSION_1_0); return &deserializer->desc.d3d12.u.Desc_1_0; } @@ -354,7 +354,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_versioned_root_signature_deserializer_Get } } - assert(deserializer->other_desc.d3d12.Version == version); + VKD3D_ASSERT(deserializer->other_desc.d3d12.Version == version); *desc = &deserializer->other_desc.d3d12; return S_OK; } diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index d1fa866d9e3..ba4e2e8488d 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -37,7 +37,6 @@ #include "vkd3d.h" #include "vkd3d_shader.h" -#include #include #include #include @@ -123,6 +122,7 @@ struct vkd3d_vulkan_info bool KHR_image_format_list; bool KHR_maintenance2; bool KHR_maintenance3; + bool KHR_portability_subset; bool KHR_push_descriptor; bool KHR_sampler_mirror_clamp_to_edge; bool KHR_timeline_semaphore; @@ -145,6 +145,8 @@ struct vkd3d_vulkan_info bool rasterization_stream; bool transform_feedback_queries; + bool geometry_shaders; + bool tessellation_shaders; bool uav_read_without_format; @@ -676,7 +678,7 @@ static inline void *d3d12_desc_get_object_ref(const volatile struct d3d12_desc * void *view; /* Some games, e.g. Shadow of the Tomb Raider, GRID 2019, and Horizon Zero Dawn, write descriptors - * from multiple threads without syncronisation. This is apparently valid in Windows. */ + * from multiple threads without synchronisation. This is apparently valid in Windows. */ for (;;) { do @@ -784,8 +786,8 @@ extern const enum vkd3d_vk_descriptor_set_index vk_descriptor_set_index_table[]; static inline enum vkd3d_vk_descriptor_set_index vkd3d_vk_descriptor_set_index_from_vk_descriptor_type( VkDescriptorType type) { - assert(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); - assert(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT); + VKD3D_ASSERT(type <= VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER); + VKD3D_ASSERT(vk_descriptor_set_index_table[type] < VKD3D_SET_INDEX_COUNT); return vk_descriptor_set_index_table[type]; } @@ -1229,7 +1231,7 @@ enum vkd3d_pipeline_bind_point /* ID3D12CommandList */ struct d3d12_command_list { - ID3D12GraphicsCommandList5 ID3D12GraphicsCommandList5_iface; + ID3D12GraphicsCommandList6 ID3D12GraphicsCommandList6_iface; unsigned int refcount; D3D12_COMMAND_LIST_TYPE type; @@ -1753,7 +1755,6 @@ static inline void vk_prepend_struct(void *header, void *structure) { VkBaseOutStructure *vk_header = header, *vk_structure = structure; - assert(!vk_structure->pNext); vk_structure->pNext = vk_header->pNext; vk_header->pNext = vk_structure; } @@ -1766,7 +1767,7 @@ static inline void vkd3d_prepend_struct(void *header, void *structure) const void *next; } *vkd3d_header = header, *vkd3d_structure = structure; - assert(!vkd3d_structure->next); + VKD3D_ASSERT(!vkd3d_structure->next); vkd3d_structure->next = vkd3d_header->next; vkd3d_header->next = vkd3d_structure; } -- 2.45.2