From 70a24464222d56067cc610d54f66d438051e81a7 Mon Sep 17 00:00:00 2001 From: Alistair Leslie-Hughes Date: Thu, 7 Mar 2024 10:40:41 +1100 Subject: [PATCH] Updated vkd3d to 166dc24b2f73b0541a14815081ee4c8d9eab3269. --- libs/vkd3d/include/private/vkd3d_common.h | 181 +- libs/vkd3d/include/private/vkd3d_memory.h | 2 +- libs/vkd3d/include/vkd3d_shader.h | 36 +- libs/vkd3d/include/vkd3d_types.h | 2 + libs/vkd3d/libs/vkd3d-common/blob.c | 3 +- libs/vkd3d/libs/vkd3d-common/debug.c | 2 +- libs/vkd3d/libs/vkd3d-common/error.c | 1 - libs/vkd3d/libs/vkd3d-shader/d3d_asm.c | 471 +++-- libs/vkd3d/libs/vkd3d-shader/d3dbc.c | 54 +- libs/vkd3d/libs/vkd3d-shader/dxbc.c | 39 +- libs/vkd3d/libs/vkd3d-shader/dxil.c | 499 ++++- libs/vkd3d/libs/vkd3d-shader/fx.c | 361 +++- libs/vkd3d/libs/vkd3d-shader/glsl.c | 88 +- libs/vkd3d/libs/vkd3d-shader/hlsl.c | 28 +- libs/vkd3d/libs/vkd3d-shader/hlsl.h | 11 +- libs/vkd3d/libs/vkd3d-shader/hlsl.l | 6 +- libs/vkd3d/libs/vkd3d-shader/hlsl.y | 128 +- libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 346 +++- libs/vkd3d/libs/vkd3d-shader/ir.c | 1653 ++++++++++++++--- libs/vkd3d/libs/vkd3d-shader/spirv.c | 299 ++- libs/vkd3d/libs/vkd3d-shader/tpf.c | 49 +- .../libs/vkd3d-shader/vkd3d_shader_main.c | 273 ++- .../libs/vkd3d-shader/vkd3d_shader_private.h | 67 +- libs/vkd3d/libs/vkd3d/device.c | 365 ++-- libs/vkd3d/libs/vkd3d/resource.c | 9 +- libs/vkd3d/libs/vkd3d/vkd3d_main.c | 4 +- libs/vkd3d/libs/vkd3d/vkd3d_private.h | 82 +- 27 files changed, 3972 insertions(+), 1087 deletions(-) diff --git a/libs/vkd3d/include/private/vkd3d_common.h b/libs/vkd3d/include/private/vkd3d_common.h index 6a3b530d868..974ff9446db 100644 --- a/libs/vkd3d/include/private/vkd3d_common.h +++ b/libs/vkd3d/include/private/vkd3d_common.h @@ -30,6 +30,9 @@ #include #include #include +#ifndef _WIN32 +#include +#endif #ifdef _MSC_VER #include @@ -105,11 +108,130 @@ VKD3D_NORETURN static inline void vkd3d_unreachable_(const char *filename, unsig #define vkd3d_unreachable() vkd3d_unreachable_(__FILE__, __LINE__) #endif +#ifdef VKD3D_NO_TRACE_MESSAGES +#define TRACE(args...) do { } while (0) +#define TRACE_ON() (false) +#endif + +#ifdef VKD3D_NO_DEBUG_MESSAGES +#define WARN(args...) do { } while (0) +#define FIXME(args...) do { } while (0) +#endif + +enum vkd3d_dbg_level +{ + VKD3D_DBG_LEVEL_NONE, + VKD3D_DBG_LEVEL_ERR, + VKD3D_DBG_LEVEL_FIXME, + VKD3D_DBG_LEVEL_WARN, + VKD3D_DBG_LEVEL_TRACE, +}; + +enum vkd3d_dbg_level vkd3d_dbg_get_level(void); + +void vkd3d_dbg_printf(enum vkd3d_dbg_level level, const char *function, const char *fmt, ...) VKD3D_PRINTF_FUNC(3, 4); +void vkd3d_dbg_set_log_callback(PFN_vkd3d_log callback); + +const char *vkd3d_dbg_sprintf(const char *fmt, ...) VKD3D_PRINTF_FUNC(1, 2); +const char *vkd3d_dbg_vsprintf(const char *fmt, va_list args); +const char *debugstr_a(const char *str); +const char *debugstr_an(const char *str, size_t n); +const char *debugstr_w(const WCHAR *wstr, size_t wchar_size); + +#define VKD3D_DBG_LOG(level) \ + do { \ + const enum vkd3d_dbg_level vkd3d_dbg_level = VKD3D_DBG_LEVEL_##level; \ + VKD3D_DBG_PRINTF + +#define VKD3D_DBG_LOG_ONCE(first_time_level, level) \ + do { \ + static bool vkd3d_dbg_next_time; \ + const enum vkd3d_dbg_level vkd3d_dbg_level = vkd3d_dbg_next_time \ + ? VKD3D_DBG_LEVEL_##level : VKD3D_DBG_LEVEL_##first_time_level; \ + vkd3d_dbg_next_time = true; \ + VKD3D_DBG_PRINTF + +#define VKD3D_DBG_PRINTF(...) \ + vkd3d_dbg_printf(vkd3d_dbg_level, __FUNCTION__, __VA_ARGS__); } while (0) + +#ifndef TRACE +#define TRACE VKD3D_DBG_LOG(TRACE) +#endif + +#ifndef WARN +#define WARN VKD3D_DBG_LOG(WARN) +#endif + +#ifndef FIXME +#define FIXME VKD3D_DBG_LOG(FIXME) +#endif + +#define ERR VKD3D_DBG_LOG(ERR) + +#ifndef TRACE_ON +#define TRACE_ON() (vkd3d_dbg_get_level() == VKD3D_DBG_LEVEL_TRACE) +#endif + +#ifndef WARN_ON +#define WARN_ON() (vkd3d_dbg_get_level() >= VKD3D_DBG_LEVEL_WARN) +#endif + +#define FIXME_ONCE VKD3D_DBG_LOG_ONCE(FIXME, WARN) + +#define VKD3D_DEBUG_ENV_NAME(name) const char *const vkd3d_dbg_env_name = name + +static inline const char *debugstr_guid(const GUID *guid) +{ + if (!guid) + return "(null)"; + + return vkd3d_dbg_sprintf("{%08lx-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x}", + (unsigned long)guid->Data1, guid->Data2, guid->Data3, guid->Data4[0], + guid->Data4[1], guid->Data4[2], guid->Data4[3], guid->Data4[4], + guid->Data4[5], guid->Data4[6], guid->Data4[7]); +} + +static inline const char *debugstr_hresult(HRESULT hr) +{ + switch (hr) + { +#define TO_STR(u) case u: return #u; + TO_STR(S_OK) + TO_STR(S_FALSE) + TO_STR(E_NOTIMPL) + TO_STR(E_NOINTERFACE) + TO_STR(E_POINTER) + TO_STR(E_ABORT) + TO_STR(E_FAIL) + TO_STR(E_OUTOFMEMORY) + TO_STR(E_INVALIDARG) + TO_STR(DXGI_ERROR_NOT_FOUND) + TO_STR(DXGI_ERROR_MORE_DATA) + TO_STR(DXGI_ERROR_UNSUPPORTED) +#undef TO_STR + default: + return vkd3d_dbg_sprintf("%#x", (int)hr); + } +} + +unsigned int vkd3d_env_var_as_uint(const char *name, unsigned int default_value); + +struct vkd3d_debug_option +{ + const char *name; + uint64_t flag; +}; + +bool vkd3d_debug_list_has_member(const char *string, const char *member); +uint64_t vkd3d_parse_debug_options(const char *string, + const struct vkd3d_debug_option *options, unsigned int option_count); +void vkd3d_set_thread_name(const char *name); + static inline unsigned int vkd3d_popcount(unsigned int v) { #ifdef _MSC_VER return __popcnt(v); -#elif defined(__MINGW32__) +#elif defined(HAVE_BUILTIN_POPCOUNT) return __builtin_popcount(v); #else v -= (v >> 1) & 0x55555555; @@ -305,6 +427,63 @@ static inline uint32_t vkd3d_atomic_increment_u32(uint32_t volatile *x) return vkd3d_atomic_add_fetch_u32(x, 1); } +struct vkd3d_mutex +{ +#ifdef _WIN32 + CRITICAL_SECTION lock; +#else + pthread_mutex_t lock; +#endif +}; + +static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) +{ +#ifdef _WIN32 + InitializeCriticalSection(&lock->lock); +#else + int ret; + + if ((ret = pthread_mutex_init(&lock->lock, NULL))) + ERR("Failed to initialise the mutex, ret %d.\n", ret); +#endif +} + +static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) +{ +#ifdef _WIN32 + EnterCriticalSection(&lock->lock); +#else + int ret; + + if ((ret = pthread_mutex_lock(&lock->lock))) + ERR("Failed to lock the mutex, ret %d.\n", ret); +#endif +} + +static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) +{ +#ifdef _WIN32 + LeaveCriticalSection(&lock->lock); +#else + int ret; + + if ((ret = pthread_mutex_unlock(&lock->lock))) + ERR("Failed to unlock the mutex, ret %d.\n", ret); +#endif +} + +static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) +{ +#ifdef _WIN32 + DeleteCriticalSection(&lock->lock); +#else + int ret; + + if ((ret = pthread_mutex_destroy(&lock->lock))) + ERR("Failed to destroy the mutex, ret %d.\n", ret); +#endif +} + static inline void vkd3d_parse_version(const char *version, int *major, int *minor) { *major = atoi(version); diff --git a/libs/vkd3d/include/private/vkd3d_memory.h b/libs/vkd3d/include/private/vkd3d_memory.h index 8a2edb1000d..bb177e39add 100644 --- a/libs/vkd3d/include/private/vkd3d_memory.h +++ b/libs/vkd3d/include/private/vkd3d_memory.h @@ -24,7 +24,7 @@ #include #include -#include "vkd3d_debug.h" +#include "vkd3d_common.h" static inline void *vkd3d_malloc(size_t size) { diff --git a/libs/vkd3d/include/vkd3d_shader.h b/libs/vkd3d/include/vkd3d_shader.h index 9e663919c38..83b90474af4 100644 --- a/libs/vkd3d/include/vkd3d_shader.h +++ b/libs/vkd3d/include/vkd3d_shader.h @@ -148,6 +148,12 @@ enum vkd3d_shader_compile_option_formatting_flags VKD3D_SHADER_COMPILE_OPTION_FORMATTING_OFFSETS = 0x00000004, VKD3D_SHADER_COMPILE_OPTION_FORMATTING_HEADER = 0x00000008, VKD3D_SHADER_COMPILE_OPTION_FORMATTING_RAW_IDS = 0x00000010, + /** + * Emit the signatures when disassembling a shader. + * + * \since 1.12 + */ + VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES = 0x00000020, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FORMATTING_FLAGS), }; @@ -212,6 +218,20 @@ enum vkd3d_shader_compile_option_feature_flags VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_FEATURE_FLAGS), }; +/** + * Flags for vkd3d_shader_parse_dxbc(). + * + * \since 1.12 + */ +enum vkd3d_shader_parse_dxbc_flags +{ + /** Ignore the checksum and continue parsing even if it is + * incorrect. */ + VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM = 0x00000001, + + VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_PARSE_DXBC_FLAGS), +}; + enum vkd3d_shader_compile_option_name { /** @@ -279,6 +299,15 @@ enum vkd3d_shader_compile_option_name * \since 1.11 */ VKD3D_SHADER_COMPILE_OPTION_FEATURE = 0x0000000a, + /** + * If \a value is non-zero compilation will produce a child effect using + * shared object descriptions, as instructed by the "shared" modifier. + * Child effects are supported with fx_2_0, fx_4_0, and fx_4_1. This option + * and "shared" modifiers are ignored for fx_5_0 profile, and non-fx profiles. + * + * \since 1.12 + */ + VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT = 0x0000000b, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_COMPILE_OPTION_NAME), }; @@ -886,6 +915,8 @@ enum vkd3d_shader_spirv_extension VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT, /** \since 1.11 */ VKD3D_SHADER_SPIRV_EXTENSION_EXT_VIEWPORT_INDEX_LAYER, + /** \since 1.12 */ + VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK, VKD3D_FORCE_32_BIT_ENUM(VKD3D_SHADER_SPIRV_EXTENSION), }; @@ -2377,9 +2408,8 @@ VKD3D_SHADER_API void vkd3d_shader_free_dxbc(struct vkd3d_shader_dxbc_desc *dxbc * * \param dxbc A vkd3d_shader_code structure containing the DXBC blob to parse. * - * \param flags A set of flags modifying the behaviour of the function. No - * flags are defined for this version of vkd3d-shader, and this parameter - * should be set to 0. + * \param flags A combination of zero or more elements of enum + * vkd3d_shader_parse_dxbc_flags. * * \param desc A vkd3d_shader_dxbc_desc structure describing the contents of * the DXBC blob. Its vkd3d_shader_dxbc_section_desc structures will contain diff --git a/libs/vkd3d/include/vkd3d_types.h b/libs/vkd3d/include/vkd3d_types.h index 12ceef42fc4..017eaf11806 100644 --- a/libs/vkd3d/include/vkd3d_types.h +++ b/libs/vkd3d/include/vkd3d_types.h @@ -41,6 +41,8 @@ enum vkd3d_result { /** Success. */ VKD3D_OK = 0, + /** Success as a result of there being nothing to do. */ + VKD3D_FALSE = 1, /** An unspecified failure occurred. */ VKD3D_ERROR = -1, /** There are not enough resources available to complete the operation. */ diff --git a/libs/vkd3d/libs/vkd3d-common/blob.c b/libs/vkd3d/libs/vkd3d-common/blob.c index 06a12ef5bc4..6bc95dc55c4 100644 --- a/libs/vkd3d/libs/vkd3d-common/blob.c +++ b/libs/vkd3d/libs/vkd3d-common/blob.c @@ -17,11 +17,12 @@ */ #define COBJMACROS + #define CONST_VTABLE #include "vkd3d.h" #include "vkd3d_blob.h" -#include "vkd3d_debug.h" #include "vkd3d_memory.h" +#include "d3d12shader.h" struct vkd3d_blob { diff --git a/libs/vkd3d/libs/vkd3d-common/debug.c b/libs/vkd3d/libs/vkd3d-common/debug.c index e12cd39450a..4523fc997ef 100644 --- a/libs/vkd3d/libs/vkd3d-common/debug.c +++ b/libs/vkd3d/libs/vkd3d-common/debug.c @@ -20,7 +20,7 @@ # define _WIN32_WINNT 0x0600 /* For InitOnceExecuteOnce(). */ #endif -#include "vkd3d_debug.h" +#include "vkd3d_common.h" #include #include diff --git a/libs/vkd3d/libs/vkd3d-common/error.c b/libs/vkd3d/libs/vkd3d-common/error.c index 3572669ac1c..b8350a5404c 100644 --- a/libs/vkd3d/libs/vkd3d-common/error.c +++ b/libs/vkd3d/libs/vkd3d-common/error.c @@ -17,7 +17,6 @@ */ #include "vkd3d_common.h" -#include "vkd3d_debug.h" HRESULT hresult_from_vkd3d_result(int vkd3d_result) { diff --git a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c index 3f86bd45960..0623a129eae 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3d_asm.c @@ -250,6 +250,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_NOT ] = "not", [VKD3DSIH_NRM ] = "nrm", [VKD3DSIH_OR ] = "or", + [VKD3DSIH_ORD ] = "ord", [VKD3DSIH_PHASE ] = "phase", [VKD3DSIH_PHI ] = "phi", [VKD3DSIH_POW ] = "pow", @@ -321,6 +322,7 @@ static const char * const shader_opcode_names[] = [VKD3DSIH_UMAX ] = "umax", [VKD3DSIH_UMIN ] = "umin", [VKD3DSIH_UMUL ] = "umul", + [VKD3DSIH_UNO ] = "uno", [VKD3DSIH_USHR ] = "ushr", [VKD3DSIH_UTOD ] = "utod", [VKD3DSIH_UTOF ] = "utof", @@ -370,6 +372,7 @@ struct vkd3d_d3d_asm_colours const char *swizzle; const char *version; const char *write_mask; + const char *label; }; struct vkd3d_d3d_asm_compiler @@ -377,7 +380,7 @@ struct vkd3d_d3d_asm_compiler struct vkd3d_string_buffer buffer; struct vkd3d_shader_version shader_version; struct vkd3d_d3d_asm_colours colours; - enum vsir_asm_dialect dialect; + enum vsir_asm_flags flags; const struct vkd3d_shader_instruction *current; }; @@ -511,79 +514,88 @@ static void shader_dump_uav_flags(struct vkd3d_d3d_asm_compiler *compiler, uint3 vkd3d_string_buffer_printf(&compiler->buffer, "_unknown_flags(%#x)", uav_flags); } -static void shader_dump_tessellator_domain(struct vkd3d_d3d_asm_compiler *compiler, - enum vkd3d_tessellator_domain domain) +static void shader_print_tessellator_domain(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, enum vkd3d_tessellator_domain d, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; + const char *domain; - shader_addline(buffer, "domain_"); - switch (domain) + switch (d) { case VKD3D_TESSELLATOR_DOMAIN_LINE: - shader_addline(buffer, "isoline"); + domain = "domain_isoline"; break; case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE: - shader_addline(buffer, "tri"); + domain = "domain_tri"; break; case VKD3D_TESSELLATOR_DOMAIN_QUAD: - shader_addline(buffer, "quad"); + domain = "domain_quad"; break; default: - shader_addline(buffer, "unknown_tessellator_domain(%#x)", domain); - break; + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, d, compiler->colours.reset, suffix); + return; } + + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, domain, suffix); } -static void shader_dump_tessellator_output_primitive(struct vkd3d_d3d_asm_compiler *compiler, - enum vkd3d_shader_tessellator_output_primitive output_primitive) +static void shader_print_tessellator_output_primitive(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, enum vkd3d_shader_tessellator_output_primitive p, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; + const char *primitive; - shader_addline(buffer, "output_"); - switch (output_primitive) + switch (p) { case VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT: - shader_addline(buffer, "point"); + primitive = "output_point"; break; case VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE: - shader_addline(buffer, "line"); + primitive = "output_line"; break; case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW: - shader_addline(buffer, "triangle_cw"); + primitive = "output_triangle_cw"; break; case VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW: - shader_addline(buffer, "triangle_ccw"); + primitive = "output_triangle_ccw"; break; default: - shader_addline(buffer, "unknown_tessellator_output_primitive(%#x)", output_primitive); - break; + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, p, compiler->colours.reset, suffix); + return; } + + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, primitive, suffix); } -static void shader_dump_tessellator_partitioning(struct vkd3d_d3d_asm_compiler *compiler, - enum vkd3d_shader_tessellator_partitioning partitioning) +static void shader_print_tessellator_partitioning(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, enum vkd3d_shader_tessellator_partitioning p, const char *suffix) { struct vkd3d_string_buffer *buffer = &compiler->buffer; + const char *partitioning; - shader_addline(buffer, "partitioning_"); - switch (partitioning) + switch (p) { case VKD3D_SHADER_TESSELLATOR_PARTITIONING_INTEGER: - shader_addline(buffer, "integer"); + partitioning = "partitioning_integer"; break; case VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2: - shader_addline(buffer, "pow2"); + partitioning = "partitioning_pow2"; break; case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: - shader_addline(buffer, "fractional_odd"); + partitioning = "partitioning_fractional_odd"; break; case VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: - shader_addline(buffer, "fractional_even"); + partitioning = "partitioning_fractional_even"; break; default: - shader_addline(buffer, "unknown_tessellator_partitioning(%#x)", partitioning); - break; + vkd3d_string_buffer_printf(buffer, "%s%s%s%s", + prefix, compiler->colours.error, p, compiler->colours.reset, suffix); + return; } + + vkd3d_string_buffer_printf(buffer, "%s%s%s", prefix, partitioning, suffix); } static void shader_dump_shader_input_sysval_semantic(struct vkd3d_d3d_asm_compiler *compiler, @@ -646,6 +658,8 @@ static void shader_dump_data_type(struct vkd3d_d3d_asm_compiler *compiler, enum [VKD3D_DATA_UINT8 ] = "uint8", [VKD3D_DATA_UINT64 ] = "uint64", [VKD3D_DATA_BOOL ] = "bool", + [VKD3D_DATA_UINT16 ] = "uint16", + [VKD3D_DATA_HALF ] = "half", }; const char *name; @@ -793,8 +807,8 @@ static void shader_dump_decl_usage(struct vkd3d_d3d_asm_compiler *compiler, } } -static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, - const struct vkd3d_shader_src_param *param); +static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, const struct vkd3d_shader_src_param *param, const char *suffix); static void shader_print_float_literal(struct vkd3d_d3d_asm_compiler *compiler, const char *prefix, float f, const char *suffix) @@ -891,13 +905,9 @@ static void shader_print_untyped_literal(struct vkd3d_d3d_asm_compiler *compiler static void shader_print_subscript(struct vkd3d_d3d_asm_compiler *compiler, unsigned int offset, const struct vkd3d_shader_src_param *rel_addr) { - vkd3d_string_buffer_printf(&compiler->buffer, "["); if (rel_addr) - { - shader_dump_src_param(compiler, rel_addr); - vkd3d_string_buffer_printf(&compiler->buffer, " + "); - } - shader_print_uint_literal(compiler, "", offset, "]"); + shader_print_src_param(compiler, "[", rel_addr, " + "); + shader_print_uint_literal(compiler, rel_addr ? "" : "[", offset, "]"); } static void shader_print_subscript_range(struct vkd3d_d3d_asm_compiler *compiler, @@ -920,7 +930,7 @@ static void shader_dump_register(struct vkd3d_d3d_asm_compiler *compiler, const static const char * const rastout_reg_names[] = {"oPos", "oFog", "oPts"}; static const char * const misctype_reg_names[] = {"vPos", "vFace"}; - shader_addline(buffer, "%s", compiler->colours.reg); + shader_addline(buffer, "%s", reg->type == VKD3DSPR_LABEL ? compiler->colours.label : compiler->colours.reg); switch (reg->type) { case VKD3DSPR_TEMP: @@ -1370,7 +1380,10 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, struct vkd3d_string_buffer *buffer = &compiler->buffer; const char *dimension; - if (compiler->dialect != VSIR_ASM_VSIR) + if (!(compiler->flags & VSIR_ASM_FLAG_DUMP_TYPES)) + return; + + if (reg->data_type == VKD3D_DATA_UNUSED) return; if (reg->dimension < ARRAY_SIZE(dimensions)) @@ -1383,78 +1396,110 @@ static void shader_dump_reg_type(struct vkd3d_d3d_asm_compiler *compiler, shader_addline(buffer, ">"); } -static void shader_dump_dst_param(struct vkd3d_d3d_asm_compiler *compiler, - const struct vkd3d_shader_dst_param *param, bool is_declaration) +static void shader_print_write_mask(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, uint32_t mask, const char *suffix) +{ + unsigned int i = 0; + char buffer[5]; + + if (mask == 0) + { + vkd3d_string_buffer_printf(&compiler->buffer, "%s%s", prefix, suffix); + return; + } + + if (mask & VKD3DSP_WRITEMASK_0) + buffer[i++] = 'x'; + if (mask & VKD3DSP_WRITEMASK_1) + buffer[i++] = 'y'; + if (mask & VKD3DSP_WRITEMASK_2) + buffer[i++] = 'z'; + if (mask & VKD3DSP_WRITEMASK_3) + buffer[i++] = 'w'; + buffer[i++] = '\0'; + + vkd3d_string_buffer_printf(&compiler->buffer, "%s.%s%s%s%s", prefix, + compiler->colours.write_mask, buffer, compiler->colours.reset, suffix); +} + +static void shader_print_dst_param(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, const struct vkd3d_shader_dst_param *param, bool is_declaration, const char *suffix) { - struct vkd3d_string_buffer *buffer = &compiler->buffer; uint32_t write_mask = param->write_mask; + vkd3d_string_buffer_printf(&compiler->buffer, "%s", prefix); shader_dump_register(compiler, ¶m->reg, is_declaration); if (write_mask && param->reg.dimension == VSIR_DIMENSION_VEC4) { - static const char write_mask_chars[] = "xyzw"; - if (data_type_is_64_bit(param->reg.data_type)) write_mask = vsir_write_mask_32_from_64(write_mask); - shader_addline(buffer, ".%s", compiler->colours.write_mask); - if (write_mask & VKD3DSP_WRITEMASK_0) - shader_addline(buffer, "%c", write_mask_chars[0]); - if (write_mask & VKD3DSP_WRITEMASK_1) - shader_addline(buffer, "%c", write_mask_chars[1]); - if (write_mask & VKD3DSP_WRITEMASK_2) - shader_addline(buffer, "%c", write_mask_chars[2]); - if (write_mask & VKD3DSP_WRITEMASK_3) - shader_addline(buffer, "%c", write_mask_chars[3]); - shader_addline(buffer, "%s", compiler->colours.reset); + shader_print_write_mask(compiler, "", write_mask, ""); } shader_print_precision(compiler, ¶m->reg); shader_print_non_uniform(compiler, ¶m->reg); shader_dump_reg_type(compiler, ¶m->reg); + vkd3d_string_buffer_printf(&compiler->buffer, "%s", suffix); } -static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, - const struct vkd3d_shader_src_param *param) +static void shader_print_src_param(struct vkd3d_d3d_asm_compiler *compiler, + const char *prefix, const struct vkd3d_shader_src_param *param, const char *suffix) { enum vkd3d_shader_src_modifier src_modifier = param->modifiers; struct vkd3d_string_buffer *buffer = &compiler->buffer; uint32_t swizzle = param->swizzle; + const char *modifier = ""; if (src_modifier == VKD3DSPSM_NEG || src_modifier == VKD3DSPSM_BIASNEG || src_modifier == VKD3DSPSM_SIGNNEG || src_modifier == VKD3DSPSM_X2NEG || src_modifier == VKD3DSPSM_ABSNEG) - shader_addline(buffer, "-"); + modifier = "-"; else if (src_modifier == VKD3DSPSM_COMP) - shader_addline(buffer, "1-"); + modifier = "1-"; else if (src_modifier == VKD3DSPSM_NOT) - shader_addline(buffer, "!"); + modifier = "!"; + vkd3d_string_buffer_printf(buffer, "%s%s", prefix, modifier); if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) - shader_addline(buffer, "|"); + vkd3d_string_buffer_printf(buffer, "|"); shader_dump_register(compiler, ¶m->reg, false); switch (src_modifier) { - case VKD3DSPSM_NONE: break; - case VKD3DSPSM_NEG: break; - case VKD3DSPSM_NOT: break; - case VKD3DSPSM_BIAS: shader_addline(buffer, "_bias"); break; - case VKD3DSPSM_BIASNEG: shader_addline(buffer, "_bias"); break; - case VKD3DSPSM_SIGN: shader_addline(buffer, "_bx2"); break; - case VKD3DSPSM_SIGNNEG: shader_addline(buffer, "_bx2"); break; - case VKD3DSPSM_COMP: break; - case VKD3DSPSM_X2: shader_addline(buffer, "_x2"); break; - case VKD3DSPSM_X2NEG: shader_addline(buffer, "_x2"); break; - case VKD3DSPSM_DZ: shader_addline(buffer, "_dz"); break; - case VKD3DSPSM_DW: shader_addline(buffer, "_dw"); break; + case VKD3DSPSM_NONE: + case VKD3DSPSM_NEG: + case VKD3DSPSM_COMP: + case VKD3DSPSM_ABS: case VKD3DSPSM_ABSNEG: - case VKD3DSPSM_ABS: /* handled later */ break; - default: shader_addline(buffer, "_unknown_modifier(%#x)", src_modifier); + case VKD3DSPSM_NOT: + break; + case VKD3DSPSM_BIAS: + case VKD3DSPSM_BIASNEG: + vkd3d_string_buffer_printf(buffer, "_bias"); + break; + case VKD3DSPSM_SIGN: + case VKD3DSPSM_SIGNNEG: + vkd3d_string_buffer_printf(buffer, "_bx2"); + break; + case VKD3DSPSM_X2: + case VKD3DSPSM_X2NEG: + vkd3d_string_buffer_printf(buffer, "_x2"); + break; + case VKD3DSPSM_DZ: + vkd3d_string_buffer_printf(buffer, "_dz"); + break; + case VKD3DSPSM_DW: + vkd3d_string_buffer_printf(buffer, "_dw"); + break; + default: + vkd3d_string_buffer_printf(buffer, "_%s%s", + compiler->colours.error, src_modifier, compiler->colours.reset); + break; } if (param->reg.type != VKD3DSPR_IMMCONST && param->reg.type != VKD3DSPR_IMMCONST64 @@ -1472,26 +1517,22 @@ static void shader_dump_src_param(struct vkd3d_d3d_asm_compiler *compiler, swizzle_z = vsir_swizzle_get_component(swizzle, 2); swizzle_w = vsir_swizzle_get_component(swizzle, 3); - if (swizzle_x == swizzle_y - && swizzle_x == swizzle_z - && swizzle_x == swizzle_w) - { - shader_addline(buffer, ".%s%c%s", compiler->colours.swizzle, + if (swizzle_x == swizzle_y && swizzle_x == swizzle_z && swizzle_x == swizzle_w) + vkd3d_string_buffer_printf(buffer, ".%s%c%s", compiler->colours.swizzle, swizzle_chars[swizzle_x], compiler->colours.reset); - } else - { - shader_addline(buffer, ".%s%c%c%c%c%s", compiler->colours.swizzle, + vkd3d_string_buffer_printf(buffer, ".%s%c%c%c%c%s", compiler->colours.swizzle, swizzle_chars[swizzle_x], swizzle_chars[swizzle_y], swizzle_chars[swizzle_z], swizzle_chars[swizzle_w], compiler->colours.reset); - } } + if (src_modifier == VKD3DSPSM_ABS || src_modifier == VKD3DSPSM_ABSNEG) - shader_addline(buffer, "|"); + vkd3d_string_buffer_printf(buffer, "|"); shader_print_precision(compiler, ¶m->reg); shader_print_non_uniform(compiler, ¶m->reg); shader_dump_reg_type(compiler, ¶m->reg); + vkd3d_string_buffer_printf(buffer, "%s", suffix); } static void shader_dump_ins_modifiers(struct vkd3d_d3d_asm_compiler *compiler, @@ -1765,11 +1806,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, compiler->current = ins; if (ins->predicate) - { - vkd3d_string_buffer_printf(buffer, "("); - shader_dump_src_param(compiler, ins->predicate); - vkd3d_string_buffer_printf(buffer, ") "); - } + shader_print_src_param(compiler, "(", ins->predicate, ") "); /* PixWin marks instructions with the coissue flag with a '+' */ if (ins->coissue) @@ -1823,8 +1860,7 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_INDEX_RANGE: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.index_range.dst, true); + shader_print_dst_param(compiler, " ", &ins->declaration.index_range.dst, true, ""); shader_print_uint_literal(compiler, " ", ins->declaration.index_range.register_count, ""); break; @@ -1842,16 +1878,14 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, case VKD3DSIH_DCL_INPUT_PS: vkd3d_string_buffer_printf(buffer, " "); shader_dump_interpolation_mode(compiler, ins->flags); - shader_addline(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.dst, true); + shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); break; case VKD3DSIH_DCL_INPUT_PS_SGV: case VKD3DSIH_DCL_INPUT_SGV: case VKD3DSIH_DCL_INPUT_SIV: case VKD3DSIH_DCL_OUTPUT_SIV: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.register_semantic.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); shader_addline(buffer, ", "); shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); break; @@ -1859,16 +1893,14 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, case VKD3DSIH_DCL_INPUT_PS_SIV: vkd3d_string_buffer_printf(buffer, " "); shader_dump_interpolation_mode(compiler, ins->flags); - shader_addline(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.register_semantic.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.register_semantic.reg, true, ""); shader_addline(buffer, ", "); shader_dump_shader_input_sysval_semantic(compiler, ins->declaration.register_semantic.sysval_semantic); break; case VKD3DSIH_DCL_INPUT: case VKD3DSIH_DCL_OUTPUT: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.dst, true); + shader_print_dst_param(compiler, " ", &ins->declaration.dst, true, ""); break; case VKD3DSIH_DCL_INPUT_PRIMITIVE: @@ -1885,14 +1917,12 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_RESOURCE_RAW: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.raw_resource.resource.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.raw_resource.resource.reg, true, ""); shader_dump_register_space(compiler, ins->declaration.raw_resource.resource.range.space); break; case VKD3DSIH_DCL_RESOURCE_STRUCTURED: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.structured_resource.resource.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.structured_resource.resource.reg, true, ""); shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); break; @@ -1916,29 +1946,24 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, break; case VKD3DSIH_DCL_TESSELLATOR_DOMAIN: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_tessellator_domain(compiler, ins->declaration.tessellator_domain); + shader_print_tessellator_domain(compiler, " ", ins->declaration.tessellator_domain, ""); break; case VKD3DSIH_DCL_TESSELLATOR_OUTPUT_PRIMITIVE: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_tessellator_output_primitive(compiler, ins->declaration.tessellator_output_primitive); + shader_print_tessellator_output_primitive(compiler, " ", ins->declaration.tessellator_output_primitive, ""); break; case VKD3DSIH_DCL_TESSELLATOR_PARTITIONING: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_tessellator_partitioning(compiler, ins->declaration.tessellator_partitioning); + shader_print_tessellator_partitioning(compiler, " ", ins->declaration.tessellator_partitioning, ""); break; case VKD3DSIH_DCL_TGSM_RAW: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.tgsm_raw.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.tgsm_raw.reg, true, ""); shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_raw.byte_count, ""); break; case VKD3DSIH_DCL_TGSM_STRUCTURED: - vkd3d_string_buffer_printf(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.tgsm_structured.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.tgsm_structured.reg, true, ""); shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.byte_stride, ""); shader_print_uint_literal(compiler, ", ", ins->declaration.tgsm_structured.structure_count, ""); break; @@ -1951,15 +1976,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, case VKD3DSIH_DCL_UAV_RAW: shader_dump_uav_flags(compiler, ins->flags); - shader_addline(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.raw_resource.resource.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.raw_resource.resource.reg, true, ""); shader_dump_register_space(compiler, ins->declaration.raw_resource.resource.range.space); break; case VKD3DSIH_DCL_UAV_STRUCTURED: shader_dump_uav_flags(compiler, ins->flags); - shader_addline(buffer, " "); - shader_dump_dst_param(compiler, &ins->declaration.structured_resource.resource.reg, true); + shader_print_dst_param(compiler, " ", &ins->declaration.structured_resource.resource.reg, true, ""); shader_print_uint_literal(compiler, ", ", ins->declaration.structured_resource.byte_stride, ""); shader_dump_register_space(compiler, ins->declaration.structured_resource.resource.range.space); break; @@ -2021,15 +2044,13 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, for (i = 0; i < ins->dst_count; ++i) { shader_dump_ins_modifiers(compiler, &ins->dst[i]); - shader_addline(buffer, !i ? " " : ", "); - shader_dump_dst_param(compiler, &ins->dst[i], false); + shader_print_dst_param(compiler, !i ? " " : ", ", &ins->dst[i], false, ""); } /* Other source tokens */ for (i = ins->dst_count; i < (ins->dst_count + ins->src_count); ++i) { - shader_addline(buffer, !i ? " " : ", "); - shader_dump_src_param(compiler, &ins->src[i - ins->dst_count]); + shader_print_src_param(compiler, !i ? " " : ", ", &ins->src[i - ins->dst_count], ""); } break; } @@ -2037,21 +2058,186 @@ static void shader_dump_instruction(struct vkd3d_d3d_asm_compiler *compiler, shader_addline(buffer, "\n"); } -enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +static const char *get_sysval_semantic_name(enum vkd3d_shader_sysval_semantic semantic) +{ + switch (semantic) + { + case VKD3D_SHADER_SV_NONE: return "NONE"; + case VKD3D_SHADER_SV_POSITION: return "POS"; + case VKD3D_SHADER_SV_CLIP_DISTANCE: return "CLIPDST"; + case VKD3D_SHADER_SV_CULL_DISTANCE: return "CULLDST"; + case VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX: return "RTINDEX"; + case VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX: return "VPINDEX"; + case VKD3D_SHADER_SV_VERTEX_ID: return "VERTID"; + case VKD3D_SHADER_SV_PRIMITIVE_ID: return "PRIMID"; + case VKD3D_SHADER_SV_INSTANCE_ID: return "INSTID"; + case VKD3D_SHADER_SV_IS_FRONT_FACE: return "FFACE"; + case VKD3D_SHADER_SV_SAMPLE_INDEX: return "SAMPLE"; + case VKD3D_SHADER_SV_TESS_FACTOR_QUADEDGE: return "QUADEDGE"; + case VKD3D_SHADER_SV_TESS_FACTOR_QUADINT: return "QUADINT"; + case VKD3D_SHADER_SV_TESS_FACTOR_TRIEDGE: return "TRIEDGE"; + case VKD3D_SHADER_SV_TESS_FACTOR_TRIINT: return "TRIINT"; + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDET: return "LINEDET"; + case VKD3D_SHADER_SV_TESS_FACTOR_LINEDEN: return "LINEDEN"; + case VKD3D_SHADER_SV_TARGET: return "TARGET"; + case VKD3D_SHADER_SV_DEPTH: return "DEPTH"; + case VKD3D_SHADER_SV_COVERAGE: return "COVERAGE"; + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "DEPTHGE"; + case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "DEPTHLE"; + case VKD3D_SHADER_SV_STENCIL_REF: return "STENCILREF"; + default: return "??"; + } +} + +static const char *get_component_type_name(enum vkd3d_shader_component_type type) +{ + switch (type) + { + case VKD3D_SHADER_COMPONENT_VOID: return "void"; + case VKD3D_SHADER_COMPONENT_UINT: return "uint"; + case VKD3D_SHADER_COMPONENT_INT: return "int"; + case VKD3D_SHADER_COMPONENT_FLOAT: return "float"; + case VKD3D_SHADER_COMPONENT_BOOL: return "bool"; + case VKD3D_SHADER_COMPONENT_DOUBLE: return "double"; + case VKD3D_SHADER_COMPONENT_UINT64: return "uint64"; + default: return "??"; + } +} + +static const char *get_minimum_precision_name(enum vkd3d_shader_minimum_precision prec) +{ + switch (prec) + { + case VKD3D_SHADER_MINIMUM_PRECISION_NONE: return "NONE"; + case VKD3D_SHADER_MINIMUM_PRECISION_FLOAT_16: return "FLOAT_16"; + case VKD3D_SHADER_MINIMUM_PRECISION_FIXED_8_2: return "FIXED_8_2"; + case VKD3D_SHADER_MINIMUM_PRECISION_INT_16: return "INT_16"; + case VKD3D_SHADER_MINIMUM_PRECISION_UINT_16: return "UINT_16"; + default: return "??"; + } +} + +static const char *get_semantic_register_name(enum vkd3d_shader_sysval_semantic semantic) +{ + switch (semantic) + { + case VKD3D_SHADER_SV_DEPTH: return "oDepth"; + case VKD3D_SHADER_SV_DEPTH_GREATER_EQUAL: return "oDepthGE"; + case VKD3D_SHADER_SV_DEPTH_LESS_EQUAL: return "oDepthLE"; + /* SV_Coverage has name vCoverage when used as an input, + * but it doens't appear in the signature in that case. */ + case VKD3D_SHADER_SV_COVERAGE: return "oMask"; + case VKD3D_SHADER_SV_STENCIL_REF: return "oStencilRef"; + default: return "??"; + } +} + +static enum vkd3d_result dump_signature(struct vkd3d_d3d_asm_compiler *compiler, + const char *name, const char *register_name, const struct shader_signature *signature) +{ + struct vkd3d_string_buffer *buffer = &compiler->buffer; + unsigned int i; + + if (signature->element_count == 0) + return VKD3D_OK; + + vkd3d_string_buffer_printf(buffer, "%s%s%s\n", + compiler->colours.opcode, name, compiler->colours.reset); + + for (i = 0; i < signature->element_count; ++i) + { + struct signature_element *element = &signature->elements[i]; + + vkd3d_string_buffer_printf(buffer, "%s.param%s %s", compiler->colours.opcode, + compiler->colours.reset, element->semantic_name); + + if (element->semantic_index != 0) + vkd3d_string_buffer_printf(buffer, "%u", element->semantic_index); + + if (element->register_index != -1) + { + shader_print_write_mask(compiler, "", element->mask, ""); + vkd3d_string_buffer_printf(buffer, ", %s%s%d%s", compiler->colours.reg, + register_name, element->register_index, compiler->colours.reset); + shader_print_write_mask(compiler, "", element->used_mask, ""); + } + else + { + vkd3d_string_buffer_printf(buffer, ", %s%s%s", compiler->colours.reg, + get_semantic_register_name(element->sysval_semantic), compiler->colours.reset); + } + + if (!element->component_type && !element->sysval_semantic + && !element->min_precision && !element->stream_index) + goto done; + + vkd3d_string_buffer_printf(buffer, ", %s", + get_component_type_name(element->component_type)); + + if (!element->sysval_semantic && !element->min_precision && !element->stream_index) + goto done; + + vkd3d_string_buffer_printf(buffer, ", %s", + get_sysval_semantic_name(element->sysval_semantic)); + + if (!element->min_precision && !element->stream_index) + goto done; + + vkd3d_string_buffer_printf(buffer, ", %s", + get_minimum_precision_name(element->min_precision)); + + if (!element->stream_index) + goto done; + + vkd3d_string_buffer_printf(buffer, ", m%u", + element->stream_index); + + done: + vkd3d_string_buffer_printf(buffer, "\n"); + } + + return VKD3D_OK; +} + +static enum vkd3d_result dump_signatures(struct vkd3d_d3d_asm_compiler *compiler, + const struct vsir_program *program) +{ + enum vkd3d_result ret; + + if ((ret = dump_signature(compiler, ".input", + program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vicp" : "v", + &program->input_signature)) < 0) + return ret; + + if ((ret = dump_signature(compiler, ".output", "o", + &program->output_signature)) < 0) + return ret; + + if ((ret = dump_signature(compiler, ".patch_constant", + program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN ? "vpc" : "o", + &program->patch_constant_signature)) < 0) + return ret; + + vkd3d_string_buffer_printf(&compiler->buffer, "%s.text%s\n", + compiler->colours.opcode, compiler->colours.reset); + + return VKD3D_OK; +} + +enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, enum vsir_asm_dialect dialect) + struct vkd3d_shader_code *out, enum vsir_asm_flags flags) { const struct vkd3d_shader_version *shader_version = &program->shader_version; enum vkd3d_shader_compile_option_formatting_flags formatting; struct vkd3d_d3d_asm_compiler compiler = { - .dialect = dialect, + .flags = flags, }; enum vkd3d_result result = VKD3D_OK; struct vkd3d_string_buffer *buffer; unsigned int indent, i, j; const char *indent_str; - void *code; static const struct vkd3d_d3d_asm_colours no_colours = { @@ -2064,6 +2250,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, .swizzle = "", .version = "", .write_mask = "", + .label = "", }; static const struct vkd3d_d3d_asm_colours colours = { @@ -2076,6 +2263,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, .swizzle = "\x1b[93m", .version = "\x1b[36m", .write_mask = "\x1b[93m", + .label = "\x1b[91m", }; formatting = VKD3D_SHADER_COMPILE_OPTION_FORMATTING_INDENT @@ -2109,6 +2297,17 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, shader_get_type_prefix(shader_version->type), shader_version->major, shader_version->minor, compiler.colours.reset); + /* The signatures we emit only make sense for DXBC shaders. D3DBC + * doesn't even have an explicit concept of signature. */ + if (formatting & VKD3D_SHADER_COMPILE_OPTION_FORMATTING_IO_SIGNATURES && shader_version->major >= 4) + { + if ((result = dump_signatures(&compiler, program)) < 0) + { + vkd3d_string_buffer_cleanup(buffer); + return result; + } + } + indent = 0; for (i = 0; i < program->instructions.count; ++i) { @@ -2124,6 +2323,10 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, --indent; break; + case VKD3DSIH_LABEL: + indent = 0; + break; + default: break; } @@ -2142,6 +2345,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, case VKD3DSIH_IFC: case VKD3DSIH_LOOP: case VKD3DSIH_SWITCH: + case VKD3DSIH_LABEL: ++indent; break; @@ -2150,18 +2354,7 @@ enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, } } - if ((code = vkd3d_malloc(buffer->content_size))) - { - memcpy(code, buffer->buffer, buffer->content_size); - out->size = buffer->content_size; - out->code = code; - } - else - { - result = VKD3D_ERROR_OUT_OF_MEMORY; - } - - vkd3d_string_buffer_cleanup(buffer); + vkd3d_shader_code_from_string_buffer(out, buffer); return result; } @@ -2171,7 +2364,7 @@ void vkd3d_shader_trace(const struct vsir_program *program) const char *p, *q, *end; struct vkd3d_shader_code code; - if (vkd3d_dxbc_binary_to_text(program, NULL, &code, VSIR_ASM_VSIR) != VKD3D_OK) + if (d3d_asm_compile(program, NULL, &code, VSIR_ASM_FLAG_DUMP_TYPES) != VKD3D_OK) return; end = (const char *)code.code + code.size; diff --git a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c index 27f5c810436..3b935b07d61 100644 --- a/libs/vkd3d/libs/vkd3d-shader/d3dbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/d3dbc.c @@ -541,9 +541,9 @@ static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool outp struct signature_element *element; if (output) - signature = &sm1->p.shader_desc.output_signature; + signature = &sm1->p.program.output_signature; else - signature = &sm1->p.shader_desc.input_signature; + signature = &sm1->p.program.input_signature; if ((element = find_signature_element(signature, name, index))) { @@ -581,9 +581,9 @@ static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output, struct signature_element *element; if (output) - signature = &sm1->p.shader_desc.output_signature; + signature = &sm1->p.program.output_signature; else - signature = &sm1->p.shader_desc.input_signature; + signature = &sm1->p.program.input_signature; if (!(element = find_signature_element_by_register_index(signature, register_index))) { @@ -886,7 +886,6 @@ static void shader_sm1_destroy(struct vkd3d_shader_parser *parser) struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser); vsir_program_cleanup(&parser->program); - free_shader_desc(&sm1->p.shader_desc); vkd3d_free(sm1); } @@ -1237,7 +1236,6 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; const uint32_t *code = compile_info->source.code; size_t code_size = compile_info->source.size; - struct vkd3d_shader_desc *shader_desc; struct vkd3d_shader_version version; uint16_t shader_type; size_t token_count; @@ -1290,9 +1288,6 @@ static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1, if (!vkd3d_shader_parser_init(&sm1->p, message_context, compile_info->source_name, &version, &shader_sm1_parser_ops, code_size != ~(size_t)0 ? token_count / 4u + 4 : 16)) return VKD3D_ERROR_OUT_OF_MEMORY; - shader_desc = &sm1->p.shader_desc; - shader_desc->byte_code = code; - shader_desc->byte_code_size = code_size; sm1->ptr = sm1->start; return VKD3D_OK; @@ -1363,7 +1358,7 @@ int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compi sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i); if (!sm1->p.failed) - ret = vsir_validate(&sm1->p); + ret = vkd3d_shader_parser_validate(&sm1->p); if (sm1->p.failed && ret >= 0) ret = VKD3D_ERROR_INVALID_SHADER; @@ -1499,12 +1494,12 @@ static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, uns return D3DPS_VERSION(major, minor); } -static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) +D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type) { switch (type->class) { case HLSL_CLASS_ARRAY: - return sm1_class(type->e.array.type); + return hlsl_sm1_class(type->e.array.type); case HLSL_CLASS_MATRIX: assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK); if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR) @@ -1525,7 +1520,7 @@ static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type) } } -static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type) +D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type) { switch (type->base_type) { @@ -1620,7 +1615,7 @@ static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_typ } } - type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type))); + type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type))); put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx)); put_u32(buffer, vkd3d_make_u32(array_size, field_count)); put_u32(buffer, fields_offset); @@ -1977,16 +1972,13 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b { case HLSL_TYPE_INT: case HLSL_TYPE_UINT: - /* Integers are internally represented as floats, so no change is necessary.*/ + case HLSL_TYPE_BOOL: + /* Integrals are internally represented as floats, so no change is necessary.*/ case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); break; - case HLSL_TYPE_BOOL: - hlsl_fixme(ctx, &instr->loc, "SM1 cast from bool to float."); - break; - case HLSL_TYPE_DOUBLE: hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to float."); break; @@ -2002,7 +1994,10 @@ static void write_sm1_cast(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b { case HLSL_TYPE_HALF: case HLSL_TYPE_FLOAT: - /* A compilation pass applies a FLOOR operation to casts to int, so no change is necessary. */ + /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not + * reach this case unless we are missing something. */ + hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer."); + break; case HLSL_TYPE_INT: case HLSL_TYPE_UINT: write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); @@ -2067,6 +2062,9 @@ static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_b D3DDECLUSAGE usage; bool ret; + if ((!output && !var->last_read) || (output && !var->first_write)) + return; + if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, ®.type, ®.reg)) { usage = 0; @@ -2242,6 +2240,12 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b assert(instr->reg.allocated); + if (expr->op == HLSL_OP1_REINTERPRET) + { + write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, 0); + return; + } + if (expr->op == HLSL_OP1_CAST) { write_sm1_cast(ctx, buffer, instr); @@ -2329,7 +2333,15 @@ static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *b } break; + case HLSL_OP2_SLT: + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + hlsl_fixme(ctx, &instr->loc, "Lower SLT instructions for pixel shaders."); + write_sm1_binary_op(ctx, buffer, D3DSIO_SLT, &instr->reg, &arg1->reg, &arg2->reg); + break; + case HLSL_OP3_CMP: + if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) + hlsl_fixme(ctx, &instr->loc, "Lower CMP instructions for vertex shaders."); write_sm1_ternary_op(ctx, buffer, D3DSIO_CMP, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg); break; @@ -2488,7 +2500,7 @@ static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer * if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX) { - FIXME("Matrix writemasks need to be lowered.\n"); + hlsl_fixme(ctx, &instr->loc, "Lower matrix writemasks.\n"); return; } diff --git a/libs/vkd3d/libs/vkd3d-shader/dxbc.c b/libs/vkd3d/libs/vkd3d-shader/dxbc.c index 37ebc73c099..8a1012d909b 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxbc.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxbc.c @@ -150,7 +150,7 @@ static const char *shader_get_string(const char *data, size_t data_size, size_t } static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, - const char *source_name, struct vkd3d_shader_dxbc_desc *desc) + const char *source_name, uint32_t flags, struct vkd3d_shader_dxbc_desc *desc) { const struct vkd3d_shader_location location = {.source_name = source_name}; struct vkd3d_shader_dxbc_section_desc *sections, *section; @@ -186,17 +186,20 @@ static int parse_dxbc(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_ checksum[1] = read_u32(&ptr); checksum[2] = read_u32(&ptr); checksum[3] = read_u32(&ptr); - vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); - if (memcmp(checksum, calculated_checksum, sizeof(checksum))) - { - WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " - "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", - checksum[0], checksum[1], checksum[2], checksum[3], - calculated_checksum[0], calculated_checksum[1], - calculated_checksum[2], calculated_checksum[3]); - vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, - "Invalid DXBC checksum."); - return VKD3D_ERROR_INVALID_ARGUMENT; + if (!(flags & VKD3D_SHADER_PARSE_DXBC_IGNORE_CHECKSUM)) + { + vkd3d_compute_dxbc_checksum(data, data_size, calculated_checksum); + if (memcmp(checksum, calculated_checksum, sizeof(checksum))) + { + WARN("Checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x} does not match " + "calculated checksum {0x%08x, 0x%08x, 0x%08x, 0x%08x}.\n", + checksum[0], checksum[1], checksum[2], checksum[3], + calculated_checksum[0], calculated_checksum[1], + calculated_checksum[2], calculated_checksum[3]); + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_DXBC_INVALID_CHECKSUM, + "Invalid DXBC checksum."); + return VKD3D_ERROR_INVALID_ARGUMENT; + } } version = read_u32(&ptr); @@ -287,7 +290,7 @@ static int for_each_dxbc_section(const struct vkd3d_shader_code *dxbc, unsigned int i; int ret; - if ((ret = parse_dxbc(dxbc, message_context, source_name, &desc)) < 0) + if ((ret = parse_dxbc(dxbc, message_context, source_name, 0, &desc)) < 0) return ret; for (i = 0; i < desc.section_count; ++i) @@ -313,7 +316,7 @@ int vkd3d_shader_parse_dxbc(const struct vkd3d_shader_code *dxbc, *messages = NULL; vkd3d_shader_message_context_init(&message_context, VKD3D_SHADER_LOG_INFO); - ret = parse_dxbc(dxbc, &message_context, NULL, desc); + ret = parse_dxbc(dxbc, &message_context, NULL, flags, desc); vkd3d_shader_message_context_trace_messages(&message_context); if (!vkd3d_shader_message_context_copy_messages(&message_context, messages) && ret >= 0) @@ -485,7 +488,7 @@ int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, struct vkd3d_shader_message_context *message_context, void *context) { - struct vkd3d_shader_desc *desc = context; + struct dxbc_shader_desc *desc = context; int ret; switch (section->tag) @@ -550,7 +553,7 @@ static int shdr_handler(const struct vkd3d_shader_dxbc_section_desc *section, return VKD3D_OK; } -void free_shader_desc(struct vkd3d_shader_desc *desc) +void free_dxbc_shader_desc(struct dxbc_shader_desc *desc) { shader_signature_cleanup(&desc->input_signature); shader_signature_cleanup(&desc->output_signature); @@ -558,7 +561,7 @@ void free_shader_desc(struct vkd3d_shader_desc *desc) } int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc) + struct vkd3d_shader_message_context *message_context, const char *source_name, struct dxbc_shader_desc *desc) { int ret; @@ -569,7 +572,7 @@ int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, if (ret < 0) { WARN("Failed to parse shader, vkd3d result %d.\n", ret); - free_shader_desc(desc); + free_dxbc_shader_desc(desc); } return ret; diff --git a/libs/vkd3d/libs/vkd3d-shader/dxil.c b/libs/vkd3d/libs/vkd3d-shader/dxil.c index 26a8a5c1cc3..7f9a74fa737 100644 --- a/libs/vkd3d/libs/vkd3d-shader/dxil.c +++ b/libs/vkd3d/libs/vkd3d-shader/dxil.c @@ -342,6 +342,8 @@ enum dx_intrinsic_opcode { DX_LOAD_INPUT = 4, DX_STORE_OUTPUT = 5, + DX_FABS = 6, + DX_SATURATE = 7, DX_ISNAN = 8, DX_ISINF = 9, DX_ISFINITE = 10, @@ -374,6 +376,10 @@ enum dx_intrinsic_opcode DX_IMIN = 38, DX_UMAX = 39, DX_UMIN = 40, + DX_FMAD = 46, + DX_FMA = 47, + DX_IMAD = 48, + DX_UMAD = 49, DX_IBFE = 51, DX_UBFE = 52, DX_CREATE_HANDLE = 57, @@ -388,9 +394,13 @@ enum dx_intrinsic_opcode DX_TEXTURE_STORE = 67, DX_BUFFER_LOAD = 68, DX_BUFFER_STORE = 69, + DX_BUFFER_UPDATE_COUNTER = 70, DX_GET_DIMENSIONS = 72, + DX_TEXTURE_GATHER = 73, + DX_TEXTURE_GATHER_CMP = 74, DX_ATOMIC_BINOP = 78, DX_ATOMIC_CMP_XCHG = 79, + DX_BARRIER = 80, DX_DERIV_COARSEX = 83, DX_DERIV_COARSEY = 84, DX_DERIV_FINEX = 85, @@ -463,6 +473,14 @@ enum dxil_atomic_binop_code ATOMIC_BINOP_INVALID, }; +enum dxil_sync_flags +{ + SYNC_THREAD_GROUP = 0x1, + SYNC_GLOBAL_UAV = 0x2, + SYNC_THREAD_GROUP_UAV = 0x4, + SYNC_GROUP_SHARED_MEMORY = 0x8, +}; + struct sm6_pointer_info { const struct sm6_type *type; @@ -541,6 +559,7 @@ struct sm6_value { const struct sm6_type *type; enum sm6_value_type value_type; + unsigned int structure_stride; bool is_undefined; union { @@ -753,6 +772,7 @@ struct sm6_parser unsigned int indexable_temp_count; unsigned int icb_count; + unsigned int tgsm_count; struct sm6_value *values; size_t value_count; @@ -2110,6 +2130,15 @@ static inline bool sm6_value_is_undef(const struct sm6_value *value) return sm6_value_is_register(value) && value->u.reg.type == VKD3DSPR_UNDEF; } +static bool sm6_value_vector_is_constant_or_undef(const struct sm6_value **values, unsigned int count) +{ + unsigned int i; + for (i = 0; i < count; ++i) + if (!sm6_value_is_constant(values[i]) && !sm6_value_is_undef(values[i])) + return false; + return true; +} + static bool sm6_value_is_icb(const struct sm6_value *value) { return value->value_type == VALUE_TYPE_ICB; @@ -2199,6 +2228,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type return VKD3D_DATA_BOOL; case 8: return VKD3D_DATA_UINT8; + case 16: + return VKD3D_DATA_UINT16; case 32: return VKD3D_DATA_UINT; case 64: @@ -2212,6 +2243,8 @@ static enum vkd3d_data_type vkd3d_data_type_from_sm6_type(const struct sm6_type { switch (type->u.width) { + case 16: + return VKD3D_DATA_HALF; case 32: return VKD3D_DATA_FLOAT; case 64: @@ -2252,6 +2285,12 @@ static void register_init_ssa_scalar(struct vkd3d_shader_register *reg, const st register_init_ssa_vector(reg, sm6_type_get_scalar_type(type, 0), 1, value, sm6); } +static void register_make_constant_uint(struct vkd3d_shader_register *reg, unsigned int value) +{ + vsir_register_init(reg, VKD3DSPR_IMMCONST, VKD3D_DATA_UINT, 0); + reg->u.immconst_u32[0] = value; +} + static void dst_param_init(struct vkd3d_shader_dst_param *param) { param->write_mask = VKD3DSP_WRITEMASK_0; @@ -2315,6 +2354,12 @@ static void src_param_init_vector_from_reg(struct vkd3d_shader_src_param *param, param->reg = *reg; } +static void src_param_make_constant_uint(struct vkd3d_shader_src_param *param, unsigned int value) +{ + src_param_init(param); + register_make_constant_uint(¶m->reg, value); +} + static void register_index_address_init(struct vkd3d_shader_register_index *idx, const struct sm6_value *address, struct sm6_parser *sm6) { @@ -2336,14 +2381,18 @@ static void register_index_address_init(struct vkd3d_shader_register_index *idx, } } -static void instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) +static bool instruction_dst_param_init_ssa_scalar(struct vkd3d_shader_instruction *ins, struct sm6_parser *sm6) { - struct vkd3d_shader_dst_param *param = instruction_dst_params_alloc(ins, 1, sm6); struct sm6_value *dst = sm6_parser_get_current_value(sm6); + struct vkd3d_shader_dst_param *param; + + if (!(param = instruction_dst_params_alloc(ins, 1, sm6))) + return false; dst_param_init_ssa_scalar(param, dst->type, dst, sm6); param->write_mask = VKD3DSP_WRITEMASK_0; dst->u.reg = param->reg; + return true; } static void instruction_dst_param_init_ssa_vector(struct vkd3d_shader_instruction *ins, @@ -2876,7 +2925,7 @@ static enum vkd3d_result sm6_parser_constants_init(struct sm6_parser *sm6, const } if (type->u.width == 16) - FIXME("Half float type is not supported yet.\n"); + dst->u.reg.u.immconst_u32[0] = record->operands[0]; else if (type->u.width == 32) dst->u.reg.u.immconst_f32[0] = bitcast_uint64_to_float(record->operands[0]); else if (type->u.width == 64) @@ -2994,6 +3043,58 @@ static void sm6_parser_declare_indexable_temp(struct sm6_parser *sm6, const stru register_init_with_id(&dst->u.reg, VKD3DSPR_IDXTEMP, data_type, ins->declaration.indexable_temp.register_idx); } +static void sm6_parser_declare_tgsm_raw(struct sm6_parser *sm6, const struct sm6_type *elem_type, + unsigned int alignment, unsigned int init, struct sm6_value *dst) +{ + enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); + struct vkd3d_shader_instruction *ins; + unsigned int byte_count; + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_RAW); + dst_param_init(&ins->declaration.tgsm_raw.reg); + register_init_with_id(&ins->declaration.tgsm_raw.reg.reg, VKD3DSPR_GROUPSHAREDMEM, data_type, sm6->tgsm_count++); + dst->u.reg = ins->declaration.tgsm_raw.reg.reg; + dst->structure_stride = 0; + ins->declaration.tgsm_raw.alignment = alignment; + byte_count = elem_type->u.width / 8u; + if (byte_count != 4) + { + FIXME("Unsupported byte count %u.\n", byte_count); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Raw TGSM byte count %u is not supported.", byte_count); + } + ins->declaration.tgsm_raw.byte_count = byte_count; + /* The initialiser value index will be resolved later when forward references can be handled. */ + ins->flags = init; +} + +static void sm6_parser_declare_tgsm_structured(struct sm6_parser *sm6, const struct sm6_type *elem_type, + unsigned int count, unsigned int alignment, unsigned int init, struct sm6_value *dst) +{ + enum vkd3d_data_type data_type = vkd3d_data_type_from_sm6_type(elem_type); + struct vkd3d_shader_instruction *ins; + unsigned int structure_stride; + + ins = sm6_parser_add_instruction(sm6, VKD3DSIH_DCL_TGSM_STRUCTURED); + dst_param_init(&ins->declaration.tgsm_structured.reg); + register_init_with_id(&ins->declaration.tgsm_structured.reg.reg, VKD3DSPR_GROUPSHAREDMEM, + data_type, sm6->tgsm_count++); + dst->u.reg = ins->declaration.tgsm_structured.reg.reg; + structure_stride = elem_type->u.width / 8u; + if (structure_stride != 4) + { + FIXME("Unsupported structure stride %u.\n", structure_stride); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Structured TGSM byte stride %u is not supported.", structure_stride); + } + dst->structure_stride = structure_stride; + ins->declaration.tgsm_structured.alignment = alignment; + ins->declaration.tgsm_structured.byte_stride = structure_stride; + ins->declaration.tgsm_structured.structure_count = count; + /* The initialiser value index will be resolved later when forward references can be handled. */ + ins->flags = init; +} + static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_record *record) { const struct sm6_type *type, *scalar_type; @@ -3119,10 +3220,17 @@ static bool sm6_parser_declare_global(struct sm6_parser *sm6, const struct dxil_ } else if (address_space == ADDRESS_SPACE_GROUPSHARED) { - FIXME("Unsupported TGSM.\n"); - vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, - "TGSM global variables are not supported."); - return false; + if (!sm6_type_is_numeric(scalar_type)) + { + WARN("Unsupported type class %u.\n", scalar_type->class); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "TGSM variables of type class %u are not supported.", scalar_type->class); + return false; + } + if (count == 1) + sm6_parser_declare_tgsm_raw(sm6, scalar_type, alignment, init, dst); + else + sm6_parser_declare_tgsm_structured(sm6, scalar_type, count, alignment, init, dst); } else { @@ -3158,6 +3266,38 @@ static const struct vkd3d_shader_immediate_constant_buffer *resolve_forward_init return NULL; } +static bool resolve_forward_zero_initialiser(size_t index, struct sm6_parser *sm6) +{ + const struct sm6_value *value; + + if (!index) + return false; + + --index; + if (!(value = sm6_parser_get_value_safe(sm6, index)) + || (!sm6_value_is_icb(value) && !sm6_value_is_constant(value) && !sm6_value_is_undef(value))) + { + WARN("Invalid initialiser index %zu.\n", index); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "TGSM initialiser value index %zu is invalid.", index); + return false; + } + else if ((sm6_value_is_icb(value) && value->u.icb->is_null) || sm6_value_is_constant_zero(value)) + { + return true; + } + else if (sm6_value_is_undef(value)) + { + /* In VSIR, initialisation with undefined values of objects is implied, not explicit. */ + return false; + } + + FIXME("Non-zero initialisers are not supported.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Non-zero TGSM initialisers are not supported."); + return false; +} + static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) { size_t i, count, base_value_idx = sm6->value_count; @@ -3231,6 +3371,16 @@ static enum vkd3d_result sm6_parser_globals_init(struct sm6_parser *sm6) { ins->declaration.icb = resolve_forward_initialiser((uintptr_t)ins->declaration.icb, sm6); } + else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_RAW) + { + ins->declaration.tgsm_raw.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); + ins->flags = 0; + } + else if (ins->handler_idx == VKD3DSIH_DCL_TGSM_STRUCTURED) + { + ins->declaration.tgsm_structured.zero_init = resolve_forward_zero_initialiser(ins->flags, sm6); + ins->flags = 0; + } } for (i = base_value_idx; i < sm6->value_count; ++i) { @@ -3974,6 +4124,64 @@ static void sm6_parser_emit_dx_atomic_binop(struct sm6_parser *sm6, enum dx_intr dst->u.reg = dst_params[0].reg; } +static void sm6_parser_emit_dx_barrier(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + enum dxil_sync_flags flags; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_SYNC); + flags = sm6_value_get_constant_uint(operands[0]); + ins->flags = flags & (SYNC_THREAD_GROUP | SYNC_THREAD_GROUP_UAV); + if (flags & SYNC_GLOBAL_UAV) + ins->flags |= VKD3DSSF_GLOBAL_UAV; + if (flags & SYNC_GROUP_SHARED_MEMORY) + ins->flags |= VKD3DSSF_GROUP_SHARED_MEMORY; + if (flags &= ~(SYNC_THREAD_GROUP | SYNC_GLOBAL_UAV | SYNC_THREAD_GROUP_UAV | SYNC_GROUP_SHARED_MEMORY)) + { + FIXME("Unhandled flags %#x.\n", flags); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, + "Barrier flags %#x are unhandled.", flags); + } +} + +static void sm6_parser_emit_dx_buffer_update_counter(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_params; + const struct sm6_value *resource; + unsigned int i; + int8_t inc; + + resource = operands[0]; + if (!sm6_value_validate_is_handle(resource, sm6)) + return; + + if (!sm6_value_is_constant(operands[1])) + { + FIXME("Unsupported dynamic update operand.\n"); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "A dynamic update value for a UAV counter operation is not supported."); + return; + } + i = sm6_value_get_constant_uint(operands[1]); + if (i != 1 && i != 255) + { + WARN("Unexpected update value %#x.\n", i); + vkd3d_shader_parser_warning(&sm6->p, VKD3D_SHADER_WARNING_DXIL_IGNORING_OPERANDS, + "Update value %#x for a UAV counter operation is not supported.", i); + } + inc = i; + + vsir_instruction_init(ins, &sm6->p.location, (inc < 0) ? VKD3DSIH_IMM_ATOMIC_CONSUME : VKD3DSIH_IMM_ATOMIC_ALLOC); + if (!(src_params = instruction_src_params_alloc(ins, 1, sm6))) + return; + src_param_init_vector_from_reg(&src_params[0], &resource->u.handle.reg); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static void sm6_parser_emit_dx_cbuffer_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4065,6 +4273,53 @@ static void sm6_parser_emit_dx_create_handle(struct sm6_parser *sm6, enum dx_int ins->handler_idx = VKD3DSIH_NOP; } +static void sm6_parser_emit_dx_fabs(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + src_param_init_from_value(src_param, operands[0]); + src_param->modifiers = VKD3DSPSM_ABS; + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + +static enum vkd3d_shader_opcode sm6_dx_map_ma_op(enum dx_intrinsic_opcode op, const struct sm6_type *type) +{ + switch (op) + { + case DX_FMA: + return VKD3DSIH_DFMA; + case DX_FMAD: + return VKD3DSIH_MAD; + case DX_IMAD: + case DX_UMAD: + return VKD3DSIH_IMAD; + default: + vkd3d_unreachable(); + } +} + +static void sm6_parser_emit_dx_ma(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_params; + unsigned int i; + + vsir_instruction_init(ins, &sm6->p.location, sm6_dx_map_ma_op(op, operands[0]->type)); + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + for (i = 0; i < 3; ++i) + src_param_init_from_value(&src_params[i], operands[i]); + + instruction_dst_param_init_ssa_scalar(ins, sm6); +} + static void sm6_parser_emit_dx_get_dimensions(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4182,7 +4437,7 @@ static void sm6_parser_emit_dx_load_input(struct sm6_parser *sm6, enum dx_intrin vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); - signature = &sm6->p.shader_desc.input_signature; + signature = &sm6->p.program.input_signature; if (row_index >= signature->element_count) { WARN("Invalid row index %u.\n", row_index); @@ -4521,6 +4776,21 @@ static void sm6_parser_emit_dx_sample(struct sm6_parser *sm6, enum dx_intrinsic_ instruction_dst_param_init_ssa_vector(ins, component_count, sm6); } +static void sm6_parser_emit_dx_saturate(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_instruction *ins = state->ins; + struct vkd3d_shader_src_param *src_param; + + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) + return; + src_param_init_from_value(src_param, operands[0]); + + if (instruction_dst_param_init_ssa_scalar(ins, sm6)) + ins->dst->modifiers = VKD3DSPDM_SATURATE; +} + static void sm6_parser_emit_dx_sincos(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4572,7 +4842,7 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr row_index = sm6_value_get_constant_uint(operands[0]); column_index = sm6_value_get_constant_uint(operands[2]); - signature = &sm6->p.shader_desc.output_signature; + signature = &sm6->p.program.output_signature; if (row_index >= signature->element_count) { WARN("Invalid row index %u.\n", row_index); @@ -4612,6 +4882,68 @@ static void sm6_parser_emit_dx_store_output(struct sm6_parser *sm6, enum dx_intr src_param_init_from_value(src_param, value); } +static void sm6_parser_emit_dx_texture_gather(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, + const struct sm6_value **operands, struct function_emission_state *state) +{ + struct vkd3d_shader_register coord, offset; + const struct sm6_value *resource, *sampler; + struct vkd3d_shader_src_param *src_params; + struct vkd3d_shader_instruction *ins; + unsigned int swizzle; + bool extended_offset; + + resource = operands[0]; + sampler = operands[1]; + if (!sm6_value_validate_is_texture_handle(resource, op, sm6) + || !sm6_value_validate_is_sampler_handle(sampler, op, sm6)) + { + return; + } + + if (!sm6_parser_emit_coordinate_construct(sm6, &operands[2], VKD3D_VEC4_SIZE, NULL, state, &coord)) + return; + + if ((extended_offset = !sm6_value_vector_is_constant_or_undef(&operands[6], 2)) + && !sm6_parser_emit_coordinate_construct(sm6, &operands[6], 2, NULL, state, &offset)) + { + return; + } + + ins = state->ins; + if (op == DX_TEXTURE_GATHER) + { + instruction_init_with_resource(ins, extended_offset ? VKD3DSIH_GATHER4_PO : VKD3DSIH_GATHER4, resource, sm6); + if (!(src_params = instruction_src_params_alloc(ins, 3 + extended_offset, sm6))) + return; + } + else + { + instruction_init_with_resource(ins, extended_offset ? VKD3DSIH_GATHER4_PO_C : VKD3DSIH_GATHER4_C, resource, sm6); + if (!(src_params = instruction_src_params_alloc(ins, 4 + extended_offset, sm6))) + return; + src_param_init_from_value(&src_params[3 + extended_offset], operands[9]); + } + + src_param_init_vector_from_reg(&src_params[0], &coord); + if (extended_offset) + src_param_init_vector_from_reg(&src_params[1], &offset); + else + instruction_set_texel_offset(ins, &operands[6], sm6); + src_param_init_vector_from_reg(&src_params[1 + extended_offset], &resource->u.handle.reg); + src_param_init_vector_from_reg(&src_params[2 + extended_offset], &sampler->u.handle.reg); + /* Swizzle stored in the sampler parameter is the scalar component index to be gathered. */ + swizzle = sm6_value_get_constant_uint(operands[8]); + if (swizzle >= VKD3D_VEC4_SIZE) + { + WARN("Invalid swizzle %#x.\n", swizzle); + vkd3d_shader_parser_error(&sm6->p, VKD3D_SHADER_ERROR_DXIL_INVALID_OPERAND, + "Swizzle %#x for a texture gather operation is invalid.", swizzle); + } + src_params[2 + extended_offset].swizzle = swizzle; + + instruction_dst_param_init_ssa_vector(ins, VKD3D_VEC4_SIZE, sm6); +} + static void sm6_parser_emit_dx_texture_load(struct sm6_parser *sm6, enum dx_intrinsic_opcode op, const struct sm6_value **operands, struct function_emission_state *state) { @@ -4741,9 +5073,11 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_ATAN ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_ATOMIC_BINOP ] = {"o", "HciiiR", sm6_parser_emit_dx_atomic_binop}, [DX_ATOMIC_CMP_XCHG ] = {"o", "HiiiRR", sm6_parser_emit_dx_atomic_binop}, + [DX_BARRIER ] = {"v", "c", sm6_parser_emit_dx_barrier}, [DX_BFREV ] = {"m", "R", sm6_parser_emit_dx_unary}, [DX_BUFFER_LOAD ] = {"o", "Hii", sm6_parser_emit_dx_buffer_load}, [DX_BUFFER_STORE ] = {"v", "Hiiooooc", sm6_parser_emit_dx_buffer_store}, + [DX_BUFFER_UPDATE_COUNTER ] = {"i", "H8", sm6_parser_emit_dx_buffer_update_counter}, [DX_CBUFFER_LOAD_LEGACY ] = {"o", "Hi", sm6_parser_emit_dx_cbuffer_load}, [DX_COS ] = {"g", "R", sm6_parser_emit_dx_sincos}, [DX_COUNT_BITS ] = {"i", "m", sm6_parser_emit_dx_unary}, @@ -4753,9 +5087,12 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_DERIV_FINEX ] = {"e", "R", sm6_parser_emit_dx_unary}, [DX_DERIV_FINEY ] = {"e", "R", sm6_parser_emit_dx_unary}, [DX_EXP ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_FABS ] = {"g", "R", sm6_parser_emit_dx_fabs}, [DX_FIRST_BIT_HI ] = {"i", "m", sm6_parser_emit_dx_unary}, [DX_FIRST_BIT_LO ] = {"i", "m", sm6_parser_emit_dx_unary}, [DX_FIRST_BIT_SHI ] = {"i", "m", sm6_parser_emit_dx_unary}, + [DX_FMA ] = {"g", "RRR", sm6_parser_emit_dx_ma}, + [DX_FMAD ] = {"g", "RRR", sm6_parser_emit_dx_ma}, [DX_FMAX ] = {"g", "RR", sm6_parser_emit_dx_binary}, [DX_FMIN ] = {"g", "RR", sm6_parser_emit_dx_binary}, [DX_FRC ] = {"g", "R", sm6_parser_emit_dx_unary}, @@ -4764,6 +5101,7 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_HCOS ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_HSIN ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_HTAN ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_IMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, [DX_IMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, [DX_IMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, [DX_ISFINITE ] = {"1", "g", sm6_parser_emit_dx_unary}, @@ -4786,14 +5124,18 @@ static const struct sm6_dx_opcode_info sm6_dx_op_table[] = [DX_SAMPLE_C_LZ ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, [DX_SAMPLE_GRAD ] = {"o", "HHffffiiifffffff", sm6_parser_emit_dx_sample}, [DX_SAMPLE_LOD ] = {"o", "HHffffiiif", sm6_parser_emit_dx_sample}, + [DX_SATURATE ] = {"g", "R", sm6_parser_emit_dx_saturate}, [DX_SIN ] = {"g", "R", sm6_parser_emit_dx_sincos}, [DX_SPLIT_DOUBLE ] = {"S", "d", sm6_parser_emit_dx_split_double}, [DX_SQRT ] = {"g", "R", sm6_parser_emit_dx_unary}, [DX_STORE_OUTPUT ] = {"v", "ii8o", sm6_parser_emit_dx_store_output}, [DX_TAN ] = {"g", "R", sm6_parser_emit_dx_unary}, + [DX_TEXTURE_GATHER ] = {"o", "HHffffiic", sm6_parser_emit_dx_texture_gather}, + [DX_TEXTURE_GATHER_CMP ] = {"o", "HHffffiicf", sm6_parser_emit_dx_texture_gather}, [DX_TEXTURE_LOAD ] = {"o", "HiiiiCCC", sm6_parser_emit_dx_texture_load}, [DX_TEXTURE_STORE ] = {"v", "Hiiiooooc", sm6_parser_emit_dx_texture_store}, [DX_UBFE ] = {"m", "iiR", sm6_parser_emit_dx_tertiary}, + [DX_UMAD ] = {"m", "RRR", sm6_parser_emit_dx_ma}, [DX_UMAX ] = {"m", "RR", sm6_parser_emit_dx_binary}, [DX_UMIN ] = {"m", "RR", sm6_parser_emit_dx_binary}, }; @@ -5055,7 +5397,10 @@ static enum vkd3d_shader_opcode sm6_map_cast_op(uint64_t code, const struct sm6_ break; case CAST_ZEXT: case CAST_SEXT: - /* nop or min precision. TODO: native 16-bit */ + /* nop or min precision. TODO: native 16-bit. + * Extension instructions could be emitted for min precision, but in Windows + * the AMD RX 580 simply drops such instructions, which makes sense as no + * assumptions should be made about any behaviour which depends on bit width. */ if (to->u.width == from->u.width || (to->u.width == 32 && from->u.width == 16)) { op = VKD3DSIH_NOP; @@ -5187,8 +5532,8 @@ static const struct sm6_cmp_info *sm6_map_cmp2_op(uint64_t code) [FCMP_OLT] = {VKD3DSIH_LTO}, [FCMP_OLE] = {VKD3DSIH_GEO, true}, [FCMP_ONE] = {VKD3DSIH_NEO}, - [FCMP_ORD] = {VKD3DSIH_INVALID}, - [FCMP_UNO] = {VKD3DSIH_INVALID}, + [FCMP_ORD] = {VKD3DSIH_ORD}, + [FCMP_UNO] = {VKD3DSIH_UNO}, [FCMP_UEQ] = {VKD3DSIH_EQU}, [FCMP_UGT] = {VKD3DSIH_LTU, true}, [FCMP_UGE] = {VKD3DSIH_GEU}, @@ -5459,6 +5804,7 @@ static void sm6_parser_emit_gep(struct sm6_parser *sm6, const struct dxil_record register_index_address_init(®->idx[1], elem_value, sm6); reg->idx[1].is_in_bounds = is_in_bounds; reg->idx_count = 2; + dst->structure_stride = src->structure_stride; ins->handler_idx = VKD3DSIH_NOP; } @@ -5467,8 +5813,8 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { const struct sm6_type *elem_type = NULL, *pointee_type; - struct vkd3d_shader_src_param *src_param; - unsigned int alignment, i = 0; + unsigned int alignment, operand_count, i = 0; + struct vkd3d_shader_src_param *src_params; const struct sm6_value *ptr; uint64_t alignment_code; @@ -5505,12 +5851,34 @@ static void sm6_parser_emit_load(struct sm6_parser *sm6, const struct dxil_recor if (record->operands[i]) WARN("Ignoring volatile modifier.\n"); - vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (ptr->structure_stride) + { + assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_LD_STRUCTURED); - if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) - return; - src_param_init_from_value(&src_param[0], ptr); - src_param->reg.alignment = alignment; + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + if (ptr->u.reg.idx[1].rel_addr) + src_params[0] = *ptr->u.reg.idx[1].rel_addr; + else + src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); + /* Struct offset is always zero as there is no struct, just an array. */ + src_param_make_constant_uint(&src_params[1], 0); + src_param_init_from_value(&src_params[2], ptr); + src_params[2].reg.alignment = alignment; + } + else + { + operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_LD_RAW : VKD3DSIH_MOV); + + if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) + return; + if (operand_count > 1) + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[operand_count - 1], ptr); + src_params[operand_count - 1].reg.alignment = alignment; + } instruction_dst_param_init_ssa_scalar(ins, sm6); } @@ -5628,11 +5996,11 @@ static void sm6_parser_emit_ret(struct sm6_parser *sm6, const struct dxil_record static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_record *record, struct vkd3d_shader_instruction *ins, struct sm6_value *dst) { - struct vkd3d_shader_src_param *src_param; + unsigned int i = 0, alignment, operand_count; + struct vkd3d_shader_src_param *src_params; struct vkd3d_shader_dst_param *dst_param; const struct sm6_type *pointee_type; const struct sm6_value *ptr, *src; - unsigned int i = 0, alignment; uint64_t alignment_code; if (!(ptr = sm6_parser_get_value_by_ref(sm6, record, NULL, &i)) @@ -5665,16 +6033,40 @@ static void sm6_parser_emit_store(struct sm6_parser *sm6, const struct dxil_reco if (record->operands[i]) WARN("Ignoring volatile modifier.\n"); - vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_MOV); + if (ptr->structure_stride) + { + assert(ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, VKD3DSIH_STORE_STRUCTURED); - if (!(src_param = instruction_src_params_alloc(ins, 1, sm6))) - return; - src_param_init_from_value(&src_param[0], src); + if (!(src_params = instruction_src_params_alloc(ins, 3, sm6))) + return; + if (ptr->u.reg.idx[1].rel_addr) + src_params[0] = *ptr->u.reg.idx[1].rel_addr; + else + src_param_make_constant_uint(&src_params[0], ptr->u.reg.idx[1].offset); + /* Struct offset is always zero as there is no struct, just an array. */ + src_param_make_constant_uint(&src_params[1], 0); + src_param_init_from_value(&src_params[2], src); + } + else + { + operand_count = 1 + (ptr->u.reg.type == VKD3DSPR_GROUPSHAREDMEM); + vsir_instruction_init(ins, &sm6->p.location, (operand_count > 1) ? VKD3DSIH_STORE_RAW : VKD3DSIH_MOV); + + if (!(src_params = instruction_src_params_alloc(ins, operand_count, sm6))) + return; + if (operand_count > 1) + src_param_make_constant_uint(&src_params[0], 0); + src_param_init_from_value(&src_params[operand_count - 1], src); + } dst_param = instruction_dst_params_alloc(ins, 1, sm6); dst_param_init(dst_param); dst_param->reg = ptr->u.reg; dst_param->reg.alignment = alignment; + /* Groupshared stores contain the address in the src params. */ + if (dst_param->reg.type != VKD3DSPR_IDXTEMP) + dst_param->reg.idx_count = 1; } static void sm6_parser_emit_switch(struct sm6_parser *sm6, const struct dxil_record *record, @@ -7752,19 +8144,19 @@ static enum vkd3d_result sm6_parser_signatures_init(struct sm6_parser *sm6, cons } if (m->u.node->operand_count && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[0], - &sm6->p.shader_desc.input_signature)) < 0) + &sm6->p.program.input_signature)) < 0) { return ret; } if (m->u.node->operand_count > 1 && (ret = sm6_parser_read_signature(sm6, m->u.node->operands[1], - &sm6->p.shader_desc.output_signature)) < 0) + &sm6->p.program.output_signature)) < 0) { return ret; } /* TODO: patch constant signature in operand 2. */ - sm6_parser_init_input_signature(sm6, &sm6->p.shader_desc.input_signature); - sm6_parser_init_output_signature(sm6, &sm6->p.shader_desc.output_signature); + sm6_parser_init_input_signature(sm6, &sm6->p.program.input_signature); + sm6_parser_init_output_signature(sm6, &sm6->p.program.output_signature); return VKD3D_OK; } @@ -8062,7 +8454,6 @@ static void sm6_parser_destroy(struct vkd3d_shader_parser *parser) sm6_parser_metadata_cleanup(sm6); vkd3d_free(sm6->descriptors); vkd3d_free(sm6->values); - free_shader_desc(&parser->shader_desc); vkd3d_free(sm6); } @@ -8080,15 +8471,16 @@ static struct sm6_function *sm6_parser_get_function(const struct sm6_parser *sm6 return NULL; } -static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t *byte_code, size_t byte_code_size, - const char *source_name, struct vkd3d_shader_message_context *message_context) +static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const char *source_name, + struct vkd3d_shader_message_context *message_context, struct dxbc_shader_desc *dxbc_desc) { - const struct shader_signature *output_signature = &sm6->p.shader_desc.output_signature; - const struct shader_signature *input_signature = &sm6->p.shader_desc.input_signature; + const struct shader_signature *output_signature = &sm6->p.program.output_signature; + const struct shader_signature *input_signature = &sm6->p.program.input_signature; + size_t count, length, function_count, byte_code_size = dxbc_desc->byte_code_size; const struct vkd3d_shader_location location = {.source_name = source_name}; uint32_t version_token, dxil_version, token_count, magic; + const uint32_t *byte_code = dxbc_desc->byte_code; unsigned int chunk_offset, chunk_size; - size_t count, length, function_count; enum bitcode_block_abbreviation abbr; struct vkd3d_shader_version version; struct dxil_block *block; @@ -8181,6 +8573,11 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t sm6->ptr = &sm6->start[1]; sm6->bitpos = 2; + sm6->p.program.input_signature = dxbc_desc->input_signature; + sm6->p.program.output_signature = dxbc_desc->output_signature; + sm6->p.program.patch_constant_signature = dxbc_desc->patch_constant_signature; + memset(dxbc_desc, 0, sizeof(*dxbc_desc)); + block = &sm6->root_block; if ((ret = dxil_block_init(block, NULL, sm6)) < 0) { @@ -8351,7 +8748,7 @@ static enum vkd3d_result sm6_parser_init(struct sm6_parser *sm6, const uint32_t int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) { - struct vkd3d_shader_desc *shader_desc; + struct dxbc_shader_desc dxbc_desc = {0}; uint32_t *byte_code = NULL; struct sm6_parser *sm6; int ret; @@ -8364,35 +8761,37 @@ int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compi return VKD3D_ERROR_OUT_OF_MEMORY; } - shader_desc = &sm6->p.shader_desc; - shader_desc->is_dxil = true; + dxbc_desc.is_dxil = true; if ((ret = shader_extract_from_dxbc(&compile_info->source, message_context, compile_info->source_name, - shader_desc)) < 0) + &dxbc_desc)) < 0) { WARN("Failed to extract shader, vkd3d result %d.\n", ret); vkd3d_free(sm6); return ret; } - sm6->p.shader_desc = *shader_desc; - shader_desc = &sm6->p.shader_desc; - - if (((uintptr_t)shader_desc->byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) + if (((uintptr_t)dxbc_desc.byte_code & (VKD3D_DXBC_CHUNK_ALIGNMENT - 1))) { /* LLVM bitcode should be 32-bit aligned, but before dxc v1.7.2207 this was not always the case in the DXBC * container due to missing padding after signature names. Get an aligned copy to prevent unaligned access. */ - if (!(byte_code = vkd3d_malloc(align(shader_desc->byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) - ERR("Failed to allocate aligned chunk. Unaligned access will occur.\n"); - else - memcpy(byte_code, shader_desc->byte_code, shader_desc->byte_code_size); + if (!(byte_code = vkd3d_malloc(align(dxbc_desc.byte_code_size, VKD3D_DXBC_CHUNK_ALIGNMENT)))) + { + ERR("Failed to allocate aligned chunk.\n"); + free_dxbc_shader_desc(&dxbc_desc); + vkd3d_free(sm6); + return VKD3D_ERROR_OUT_OF_MEMORY; + } + + memcpy(byte_code, dxbc_desc.byte_code, dxbc_desc.byte_code_size); + dxbc_desc.byte_code = byte_code; } - ret = sm6_parser_init(sm6, byte_code ? byte_code : shader_desc->byte_code, shader_desc->byte_code_size, - compile_info->source_name, message_context); + ret = sm6_parser_init(sm6, compile_info->source_name, message_context, &dxbc_desc); + free_dxbc_shader_desc(&dxbc_desc); vkd3d_free(byte_code); if (!sm6->p.failed && ret >= 0) - ret = vsir_validate(&sm6->p); + ret = vkd3d_shader_parser_validate(&sm6->p); if (sm6->p.failed && ret >= 0) ret = VKD3D_ERROR_INVALID_SHADER; diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c index bc70d5220fd..f2be00da33a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/fx.c +++ b/libs/vkd3d/libs/vkd3d-shader/fx.c @@ -61,9 +61,9 @@ struct fx_write_context; struct fx_write_context_ops { uint32_t (*write_string)(const char *string, struct fx_write_context *fx); - uint32_t (*write_type)(const struct hlsl_type *type, struct fx_write_context *fx); void (*write_technique)(struct hlsl_ir_var *var, struct fx_write_context *fx); void (*write_pass)(struct hlsl_ir_var *var, struct fx_write_context *fx); + bool are_child_effects_supported; }; struct fx_write_context @@ -84,8 +84,13 @@ struct fx_write_context uint32_t buffer_count; uint32_t numeric_variable_count; uint32_t object_variable_count; + uint32_t shared_object_count; + uint32_t shader_variable_count; + uint32_t parameter_count; int status; + bool child_effect; + const struct fx_write_context_ops *ops; }; @@ -97,6 +102,11 @@ static void set_status(struct fx_write_context *fx, int status) fx->status = status; } +static bool has_annotations(const struct hlsl_ir_var *var) +{ + return var->annotations && !list_empty(&var->annotations->vars); +} + static uint32_t write_string(const char *string, struct fx_write_context *fx) { return fx->ops->write_string(string, fx); @@ -107,12 +117,16 @@ static void write_pass(struct hlsl_ir_var *var, struct fx_write_context *fx) fx->ops->write_pass(var, fx); } +static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx); + static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context *fx) { struct type_entry *type_entry; unsigned int elements_count; const char *name; + assert(fx->ctx->profile->major_version >= 4); + if (type->class == HLSL_CLASS_ARRAY) { name = hlsl_get_multiarray_element_type(type)->name; @@ -138,7 +152,7 @@ static uint32_t write_type(const struct hlsl_type *type, struct fx_write_context if (!(type_entry = hlsl_alloc(fx->ctx, sizeof(*type_entry)))) return 0; - type_entry->offset = fx->ops->write_type(type, fx); + type_entry->offset = write_fx_4_type(type, fx); type_entry->name = name; type_entry->elements_count = elements_count; @@ -151,6 +165,7 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co struct fx_write_context *fx) { unsigned int version = ctx->profile->major_version; + struct hlsl_block block; memset(fx, 0, sizeof(*fx)); @@ -174,12 +189,18 @@ static void fx_write_context_init(struct hlsl_ctx *ctx, const struct fx_write_co rb_init(&fx->strings, string_storage_compare); list_init(&fx->types); + + fx->child_effect = fx->ops->are_child_effects_supported && ctx->child_effect; + + hlsl_block_init(&block); + hlsl_prepend_global_uniform_copy(fx->ctx, &block); + hlsl_block_cleanup(&block); + hlsl_calculate_buffer_offsets(fx->ctx); } static int fx_write_context_cleanup(struct fx_write_context *fx) { struct type_entry *type, *next_type; - int status = fx->status; rb_destroy(&fx->strings, string_storage_destroy, NULL); @@ -189,7 +210,7 @@ static int fx_write_context_cleanup(struct fx_write_context *fx) vkd3d_free(type); } - return status; + return fx->ctx->result; } static bool technique_matches_version(const struct hlsl_ir_var *var, const struct fx_write_context *fx) @@ -285,6 +306,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, [HLSL_TYPE_UINT ] = 3, [HLSL_TYPE_BOOL ] = 4, }; + struct hlsl_ctx *ctx = fx->ctx; uint32_t value = 0; switch (type->class) @@ -295,8 +317,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, value |= numeric_type_class[type->class]; break; default: - FIXME("Unexpected type class %u.\n", type->class); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + hlsl_fixme(ctx, &ctx->location, "Not implemented for type class %u.", type->class); return 0; } @@ -309,8 +330,7 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, value |= (numeric_base_type[type->base_type] << NUMERIC_BASE_TYPE_SHIFT); break; default: - FIXME("Unexpected base type %u.\n", type->base_type); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + hlsl_fixme(ctx, &ctx->location, "Not implemented for base type %u.", type->base_type); return 0; } @@ -322,19 +342,14 @@ static uint32_t get_fx_4_numeric_type_description(const struct hlsl_type *type, return value; } -static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) +static const char * get_fx_4_type_name(const struct hlsl_type *type) { - struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; - uint32_t name_offset, offset, size, stride, numeric_desc; - uint32_t elements_count = 0; - const char *name; - static const uint32_t variable_type[] = + static const char * const object_type_names[] = { - [HLSL_CLASS_SCALAR] = 1, - [HLSL_CLASS_VECTOR] = 1, - [HLSL_CLASS_MATRIX] = 1, - [HLSL_CLASS_OBJECT] = 2, - [HLSL_CLASS_STRUCT] = 3, + [HLSL_TYPE_PIXELSHADER] = "PixelShader", + [HLSL_TYPE_VERTEXSHADER] = "VertexShader", + [HLSL_TYPE_RENDERTARGETVIEW] = "RenderTargetView", + [HLSL_TYPE_DEPTHSTENCILVIEW] = "DepthStencilView", }; static const char * const texture_type_names[] = { @@ -360,6 +375,40 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co [HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer", }; + if (type->base_type == HLSL_TYPE_TEXTURE) + return texture_type_names[type->sampler_dim]; + + if (type->base_type == HLSL_TYPE_UAV) + return uav_type_names[type->sampler_dim]; + + switch (type->base_type) + { + case HLSL_TYPE_PIXELSHADER: + case HLSL_TYPE_VERTEXSHADER: + case HLSL_TYPE_RENDERTARGETVIEW: + case HLSL_TYPE_DEPTHSTENCILVIEW: + return object_type_names[type->base_type]; + default: + return type->name; + } +} + +static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + uint32_t name_offset, offset, size, stride, numeric_desc; + uint32_t elements_count = 0; + const char *name; + static const uint32_t variable_type[] = + { + [HLSL_CLASS_SCALAR] = 1, + [HLSL_CLASS_VECTOR] = 1, + [HLSL_CLASS_MATRIX] = 1, + [HLSL_CLASS_OBJECT] = 2, + [HLSL_CLASS_STRUCT] = 3, + }; + struct hlsl_ctx *ctx = fx->ctx; + /* Resolve arrays to element type and number of elements. */ if (type->class == HLSL_CLASS_ARRAY) { @@ -367,12 +416,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co type = hlsl_get_multiarray_element_type(type); } - if (type->base_type == HLSL_TYPE_TEXTURE) - name = texture_type_names[type->sampler_dim]; - else if (type->base_type == HLSL_TYPE_UAV) - name = uav_type_names[type->sampler_dim]; - else - name = type->name; + name = get_fx_4_type_name(type); name_offset = write_string(name, fx); offset = put_u32_unaligned(buffer, name_offset); @@ -387,8 +431,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co put_u32_unaligned(buffer, variable_type[type->class]); break; default: - FIXME("Writing type class %u is not implemented.\n", type->class); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + hlsl_fixme(ctx, &ctx->location, "Writing type class %u is not implemented.", type->class); return 0; } @@ -426,6 +469,8 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co { static const uint32_t object_type[] = { + [HLSL_TYPE_PIXELSHADER] = 5, + [HLSL_TYPE_VERTEXSHADER] = 6, [HLSL_TYPE_RENDERTARGETVIEW] = 19, [HLSL_TYPE_DEPTHSTENCILVIEW] = 20, }; @@ -456,7 +501,9 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co switch (type->base_type) { case HLSL_TYPE_DEPTHSTENCILVIEW: + case HLSL_TYPE_PIXELSHADER: case HLSL_TYPE_RENDERTARGETVIEW: + case HLSL_TYPE_VERTEXSHADER: put_u32_unaligned(buffer, object_type[type->base_type]); break; case HLSL_TYPE_TEXTURE: @@ -466,8 +513,7 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co put_u32_unaligned(buffer, uav_type[type->sampler_dim]); break; default: - FIXME("Object type %u is not supported.\n", type->base_type); - set_status(fx, VKD3D_ERROR_NOT_IMPLEMENTED); + hlsl_fixme(ctx, &ctx->location, "Object type %u is not supported.", type->base_type); return 0; } } @@ -573,6 +619,73 @@ static uint32_t write_fx_2_string(const char *string, struct fx_write_context *f return offset; } +static uint32_t write_fx_2_parameter(const struct hlsl_type *type, const char *name, const struct hlsl_semantic *semantic, + struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + uint32_t semantic_offset, offset, elements_count = 0, name_offset; + struct hlsl_ctx *ctx = fx->ctx; + size_t i; + + /* Resolve arrays to element type and number of elements. */ + if (type->class == HLSL_CLASS_ARRAY) + { + elements_count = hlsl_get_multiarray_size(type); + type = hlsl_get_multiarray_element_type(type); + } + + name_offset = write_string(name, fx); + semantic_offset = write_string(semantic->name, fx); + + switch (type->base_type) + { + case HLSL_TYPE_FLOAT: + case HLSL_TYPE_BOOL: + case HLSL_TYPE_INT: + case HLSL_TYPE_VOID: + break; + default: + hlsl_fixme(ctx, &ctx->location, "Writing parameter type %u is not implemented.", + type->base_type); + return 0; + }; + + offset = put_u32(buffer, hlsl_sm1_base_type(type)); + put_u32(buffer, hlsl_sm1_class(type)); + put_u32(buffer, name_offset); + put_u32(buffer, semantic_offset); + put_u32(buffer, elements_count); + + switch (type->class) + { + case HLSL_CLASS_VECTOR: + put_u32(buffer, type->dimx); + put_u32(buffer, type->dimy); + break; + case HLSL_CLASS_SCALAR: + case HLSL_CLASS_MATRIX: + put_u32(buffer, type->dimy); + put_u32(buffer, type->dimx); + break; + case HLSL_CLASS_STRUCT: + put_u32(buffer, type->e.record.field_count); + break; + default: + ; + } + + if (type->class == HLSL_CLASS_STRUCT) + { + for (i = 0; i < type->e.record.field_count; ++i) + { + const struct hlsl_struct_field *field = &type->e.record.fields[i]; + write_fx_2_parameter(field->type, field->name, &field->semantic, fx); + } + } + + return offset; +} + static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_context *fx) { struct vkd3d_bytecode_buffer *buffer = &fx->structured; @@ -595,6 +708,88 @@ static void write_fx_2_technique(struct hlsl_ir_var *var, struct fx_write_contex set_u32(buffer, count_offset, count); } +static uint32_t get_fx_2_type_size(const struct hlsl_type *type) +{ + uint32_t size = 0, elements_count; + size_t i; + + if (type->class == HLSL_CLASS_ARRAY) + { + elements_count = hlsl_get_multiarray_size(type); + type = hlsl_get_multiarray_element_type(type); + return get_fx_2_type_size(type) * elements_count; + } + else if (type->class == HLSL_CLASS_STRUCT) + { + for (i = 0; i < type->e.record.field_count; ++i) + { + const struct hlsl_struct_field *field = &type->e.record.fields[i]; + size += get_fx_2_type_size(field->type); + } + + return size; + } + + return type->dimx * type->dimy * sizeof(float); +} + +static uint32_t write_fx_2_initial_value(const struct hlsl_ir_var *var, struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->unstructured; + const struct hlsl_type *type = var->data_type; + uint32_t i, offset, size, elements_count = 1; + + size = get_fx_2_type_size(type); + + if (type->class == HLSL_CLASS_ARRAY) + { + elements_count = hlsl_get_multiarray_size(type); + type = hlsl_get_multiarray_element_type(type); + } + + if (type->class == HLSL_CLASS_OBJECT) + { + /* Objects are given sequential ids. */ + offset = put_u32(buffer, fx->object_variable_count++); + for (i = 1; i < elements_count; ++i) + put_u32(buffer, fx->object_variable_count++); + } + else + { + /* FIXME: write actual initial value */ + offset = put_u32(buffer, 0); + + for (i = 1; i < size / sizeof(uint32_t); ++i) + put_u32(buffer, 0); + } + + return offset; +} + +static void write_fx_2_parameters(struct fx_write_context *fx) +{ + struct vkd3d_bytecode_buffer *buffer = &fx->structured; + uint32_t desc_offset, value_offset; + struct hlsl_ctx *ctx = fx->ctx; + struct hlsl_ir_var *var; + + LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry) + { + desc_offset = write_fx_2_parameter(var->data_type, var->name, &var->semantic, fx); + value_offset = write_fx_2_initial_value(var, fx); + + put_u32(buffer, desc_offset); /* Parameter description */ + put_u32(buffer, value_offset); /* Value */ + put_u32(buffer, 0); /* Flags */ + + put_u32(buffer, 0); /* Annotations count */ + if (has_annotations(var)) + hlsl_fixme(ctx, &ctx->location, "Writing annotations for parameters is not implemented."); + + ++fx->parameter_count; + } +} + static const struct fx_write_context_ops fx_2_ops = { .write_string = write_fx_2_string, @@ -604,9 +799,9 @@ static const struct fx_write_context_ops fx_2_ops = static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) { + uint32_t offset, size, technique_count, parameter_count; struct vkd3d_bytecode_buffer buffer = { 0 }; struct vkd3d_bytecode_buffer *structured; - uint32_t offset, size, technique_count; struct fx_write_context fx; fx_write_context_init(ctx, &fx_2_ops, &fx); @@ -618,12 +813,13 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&buffer, 0xfeff0901); /* Version. */ offset = put_u32(&buffer, 0); - put_u32(structured, 0); /* Parameter count */ + parameter_count = put_u32(structured, 0); /* Parameter count */ technique_count = put_u32(structured, 0); put_u32(structured, 0); /* Unknown */ put_u32(structured, 0); /* Object count */ - /* TODO: parameters */ + write_fx_2_parameters(&fx); + set_u32(structured, parameter_count, fx.parameter_count); write_techniques(ctx->globals, &fx); set_u32(structured, technique_count, fx.technique_count); @@ -643,24 +839,27 @@ static int hlsl_fx_2_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) vkd3d_free(fx.unstructured.data); vkd3d_free(fx.structured.data); - if (!fx.status) + if (!fx.technique_count) + hlsl_error(ctx, &ctx->location, VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE, "No techniques found."); + + if (fx.status < 0) + ctx->result = fx.status; + + if (!ctx->result) { out->code = buffer.data; out->size = buffer.size; } - if (fx.status < 0) - ctx->result = fx.status; - return fx_write_context_cleanup(&fx); } static const struct fx_write_context_ops fx_4_ops = { .write_string = write_fx_4_string, - .write_type = write_fx_4_type, .write_technique = write_fx_4_technique, .write_pass = write_fx_4_pass, + .are_child_effects_supported = true, }; static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) @@ -672,6 +871,7 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write { HAS_EXPLICIT_BIND_POINT = 0x4, }; + struct hlsl_ctx *ctx = fx->ctx; /* Explicit bind point. */ if (var->reg_reservation.reg_type) @@ -690,14 +890,18 @@ static void write_fx_4_numeric_variable(struct hlsl_ir_var *var, struct fx_write put_u32(buffer, flags); /* Flags */ put_u32(buffer, 0); /* Annotations count */ - /* FIXME: write annotations */ + if (has_annotations(var)) + hlsl_fixme(ctx, &ctx->location, "Writing annotations for numeric variables is not implemented."); } static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_context *fx) { + const struct hlsl_type *type = hlsl_get_multiarray_element_type(var->data_type); + uint32_t elements_count = hlsl_get_multiarray_size(var->data_type); struct vkd3d_bytecode_buffer *buffer = &fx->structured; uint32_t semantic_offset, bind_point = ~0u; - uint32_t name_offset, type_offset; + uint32_t name_offset, type_offset, i; + struct hlsl_ctx *ctx = fx->ctx; if (var->reg_reservation.reg_type) bind_point = var->reg_reservation.reg_index; @@ -712,8 +916,36 @@ static void write_fx_4_object_variable(struct hlsl_ir_var *var, struct fx_write_ semantic_offset = put_u32(buffer, semantic_offset); /* Semantic */ put_u32(buffer, bind_point); /* Explicit bind point */ + if (fx->child_effect && var->storage_modifiers & HLSL_STORAGE_SHARED) + { + ++fx->shared_object_count; + return; + } + + /* Initializer */ + switch (type->base_type) + { + case HLSL_TYPE_TEXTURE: + case HLSL_TYPE_UAV: + case HLSL_TYPE_RENDERTARGETVIEW: + break; + case HLSL_TYPE_PIXELSHADER: + case HLSL_TYPE_VERTEXSHADER: + /* FIXME: write shader blobs, once parser support works. */ + for (i = 0; i < elements_count; ++i) + put_u32(buffer, 0); + ++fx->shader_variable_count; + break; + default: + hlsl_fixme(ctx, &ctx->location, "Writing initializer for object type %u is not implemented.", + type->base_type); + } + put_u32(buffer, 0); /* Annotations count */ - /* FIXME: write annotations */ + if (has_annotations(var)) + hlsl_fixme(ctx, &ctx->location, "Writing annotations for object variables is not implemented."); + + ++fx->object_variable_count; } static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx) @@ -734,7 +966,8 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx bind_point = b->reservation.reg_index; if (b->type == HLSL_BUFFER_TEXTURE) flags |= IS_TBUFFER; - /* FIXME: set 'single' flag for fx_5_0 */ + if (ctx->profile->major_version == 5 && b->modifiers & HLSL_MODIFIER_SINGLE) + flags |= IS_SINGLE; name_offset = write_string(b->name, fx); @@ -768,12 +1001,6 @@ static void write_fx_4_buffer(struct hlsl_buffer *b, struct fx_write_context *fx static void write_buffers(struct fx_write_context *fx) { struct hlsl_buffer *buffer; - struct hlsl_block block; - - hlsl_block_init(&block); - hlsl_prepend_global_uniform_copy(fx->ctx, &block); - hlsl_block_init(&block); - hlsl_calculate_buffer_offsets(fx->ctx); LIST_FOR_EACH_ENTRY(buffer, &fx->ctx->buffers, struct hlsl_buffer, entry) { @@ -806,21 +1033,23 @@ static bool is_object_variable(const struct hlsl_ir_var *var) } } -static void write_objects(struct fx_write_context *fx) +static void write_objects(struct fx_write_context *fx, bool shared) { struct hlsl_ir_var *var; - uint32_t count = 0; + + if (shared && !fx->child_effect) + return; LIST_FOR_EACH_ENTRY(var, &fx->ctx->extern_vars, struct hlsl_ir_var, extern_entry) { if (!is_object_variable(var)) continue; + if (fx->child_effect && (shared != !!(var->storage_modifiers & HLSL_STORAGE_SHARED))) + continue; + write_fx_4_object_variable(var, fx); - ++count; } - - fx->object_variable_count += count; } static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) @@ -834,9 +1063,9 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ write_buffers(&fx); - write_objects(&fx); + write_objects(&fx, false); /* TODO: shared buffers */ - /* TODO: shared objects */ + write_objects(&fx, true); write_techniques(ctx->globals, &fx); @@ -846,7 +1075,7 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&buffer, fx.object_variable_count); /* Object variable count. */ put_u32(&buffer, 0); /* Pool buffer count. */ put_u32(&buffer, 0); /* Pool variable count. */ - put_u32(&buffer, 0); /* Pool object count. */ + put_u32(&buffer, fx.shared_object_count); /* Shared object count. */ put_u32(&buffer, fx.technique_count); size_offset = put_u32(&buffer, 0); /* Unstructured size. */ put_u32(&buffer, 0); /* String count. */ @@ -857,7 +1086,7 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&buffer, 0); /* Sampler state count. */ put_u32(&buffer, 0); /* Rendertarget view count. */ put_u32(&buffer, 0); /* Depth stencil view count. */ - put_u32(&buffer, 0); /* Shader count. */ + put_u32(&buffer, fx.shader_variable_count); /* Shader count. */ put_u32(&buffer, 0); /* Inline shader count. */ set_u32(&buffer, size_offset, fx.unstructured.size); @@ -870,15 +1099,15 @@ static int hlsl_fx_4_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) set_status(&fx, buffer.status); - if (!fx.status) + if (fx.status < 0) + ctx->result = fx.status; + + if (!ctx->result) { out->code = buffer.data; out->size = buffer.size; } - if (fx.status < 0) - ctx->result = fx.status; - return fx_write_context_cleanup(&fx); } @@ -893,7 +1122,7 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&fx.unstructured, 0); /* Empty string placeholder. */ write_buffers(&fx); - write_objects(&fx); + write_objects(&fx, false); /* TODO: interface variables */ write_groups(&fx); @@ -915,7 +1144,7 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) put_u32(&buffer, 0); /* Sampler state count. */ put_u32(&buffer, 0); /* Rendertarget view count. */ put_u32(&buffer, 0); /* Depth stencil view count. */ - put_u32(&buffer, 0); /* Shader count. */ + put_u32(&buffer, fx.shader_variable_count); /* Shader count. */ put_u32(&buffer, 0); /* Inline shader count. */ put_u32(&buffer, fx.group_count); /* Group count. */ put_u32(&buffer, 0); /* UAV count. */ @@ -933,15 +1162,15 @@ static int hlsl_fx_5_write(struct hlsl_ctx *ctx, struct vkd3d_shader_code *out) set_status(&fx, buffer.status); - if (!fx.status) + if (fx.status < 0) + ctx->result = fx.status; + + if (!ctx->result) { out->code = buffer.data; out->size = buffer.size; } - if (fx.status < 0) - ctx->result = fx.status; - return fx_write_context_cleanup(&fx); } diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c index bdd03c1e72a..3977852a48d 100644 --- a/libs/vkd3d/libs/vkd3d-shader/glsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c @@ -20,29 +20,14 @@ struct vkd3d_glsl_generator { - struct vkd3d_shader_version version; + struct vsir_program *program; struct vkd3d_string_buffer buffer; struct vkd3d_shader_location location; struct vkd3d_shader_message_context *message_context; + unsigned int indent; bool failed; }; -struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location) -{ - struct vkd3d_glsl_generator *generator; - - if (!(generator = vkd3d_malloc(sizeof(*generator)))) - return NULL; - - memset(generator, 0, sizeof(*generator)); - generator->version = *version; - vkd3d_string_buffer_init(&generator->buffer); - generator->location = *location; - generator->message_context = message_context; - return generator; -} - static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( struct vkd3d_glsl_generator *generator, enum vkd3d_shader_error error, const char *fmt, ...) @@ -55,10 +40,23 @@ static void VKD3D_PRINTF_FUNC(3, 4) vkd3d_glsl_compiler_error( generator->failed = true; } +static void shader_glsl_print_indent(struct vkd3d_string_buffer *buffer, unsigned int indent) +{ + vkd3d_string_buffer_printf(buffer, "%*s", 4 * indent, ""); +} + +static void shader_glsl_unhandled(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins) +{ + shader_glsl_print_indent(&gen->buffer, gen->indent); + vkd3d_string_buffer_printf(&gen->buffer, "/* */\n", ins->handler_idx); + vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL, + "Internal compiler error: Unhandled instruction %#x.", ins->handler_idx); +} + static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, const struct vkd3d_shader_instruction *ins) { - const struct vkd3d_shader_version *version = &generator->version; + const struct vkd3d_shader_version *version = &generator->program->shader_version; /* * TODO: Implement in_subroutine @@ -66,6 +64,7 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, */ if (version->major >= 4) { + shader_glsl_print_indent(&generator->buffer, generator->indent); vkd3d_string_buffer_printf(&generator->buffer, "return;\n"); } } @@ -73,6 +72,8 @@ static void shader_glsl_ret(struct vkd3d_glsl_generator *generator, static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator, const struct vkd3d_shader_instruction *instruction) { + generator->location = instruction->location; + switch (instruction->handler_idx) { case VKD3DSIH_DCL_INPUT: @@ -83,34 +84,36 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *generator shader_glsl_ret(generator, instruction); break; default: - vkd3d_glsl_compiler_error(generator, - VKD3D_SHADER_ERROR_GLSL_INTERNAL, - "Unhandled instruction %#x", instruction->handler_idx); + shader_glsl_unhandled(generator, instruction); break; } } -int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, - struct vsir_program *program, struct vkd3d_shader_code *out) +static int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, struct vkd3d_shader_code *out) { + const struct vkd3d_shader_instruction_array *instructions = &generator->program->instructions; unsigned int i; void *code; + ERR("Generating a GLSL shader. This is unsupported; you get to keep all the pieces if it breaks.\n"); + vkd3d_string_buffer_printf(&generator->buffer, "#version 440\n\n"); vkd3d_string_buffer_printf(&generator->buffer, "void main()\n{\n"); - generator->location.column = 0; - for (i = 0; i < program->instructions.count; ++i) + ++generator->indent; + for (i = 0; i < instructions->count; ++i) { - generator->location.line = i + 1; - vkd3d_glsl_handle_instruction(generator, &program->instructions.elements[i]); + vkd3d_glsl_handle_instruction(generator, &instructions->elements[i]); } + vkd3d_string_buffer_printf(&generator->buffer, "}\n"); + + if (TRACE_ON()) + vkd3d_string_buffer_trace(&generator->buffer); + if (generator->failed) return VKD3D_ERROR_INVALID_SHADER; - vkd3d_string_buffer_printf(&generator->buffer, "}\n"); - if ((code = vkd3d_malloc(generator->buffer.buffer_size))) { memcpy(code, generator->buffer.buffer, generator->buffer.content_size); @@ -122,8 +125,29 @@ int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, return VKD3D_OK; } -void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator) +static void vkd3d_glsl_generator_cleanup(struct vkd3d_glsl_generator *gen) { - vkd3d_string_buffer_cleanup(&generator->buffer); - vkd3d_free(generator); + vkd3d_string_buffer_cleanup(&gen->buffer); +} + +static void vkd3d_glsl_generator_init(struct vkd3d_glsl_generator *gen, + struct vsir_program *program, struct vkd3d_shader_message_context *message_context) +{ + memset(gen, 0, sizeof(*gen)); + gen->program = program; + vkd3d_string_buffer_init(&gen->buffer); + gen->message_context = message_context; +} + +int glsl_compile(struct vsir_program *program, struct vkd3d_shader_code *out, + struct vkd3d_shader_message_context *message_context) +{ + struct vkd3d_glsl_generator generator; + int ret; + + vkd3d_glsl_generator_init(&generator, program, message_context); + ret = vkd3d_glsl_generator_generate(&generator, out); + vkd3d_glsl_generator_cleanup(&generator); + + return ret; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c index 538f083df9c..a82334e58fd 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c @@ -784,7 +784,9 @@ static const char * get_case_insensitive_typename(const char *name) "dword", "float", "matrix", + "pixelshader", "vector", + "vertexshader", }; unsigned int i; @@ -1346,6 +1348,16 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); } +struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3) +{ + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {arg1, arg2, arg3}; + + assert(hlsl_types_are_equal(arg1->data_type, arg2->data_type)); + assert(hlsl_types_are_equal(arg1->data_type, arg3->data_type)); + return hlsl_new_expr(ctx, op, operands, arg1->data_type, &arg1->loc); +} + struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *condition, struct hlsl_block *then_block, struct hlsl_block *else_block, const struct vkd3d_shader_location *loc) { @@ -2018,7 +2030,7 @@ struct hlsl_ir_function_decl *hlsl_new_func_decl(struct hlsl_ctx *ctx, } struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, - const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) + uint32_t modifiers, const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc) { struct hlsl_buffer *buffer; @@ -2026,6 +2038,7 @@ struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type return NULL; buffer->type = type; buffer->name = name; + buffer->modifiers = modifiers; if (reservation) buffer->reservation = *reservation; buffer->loc = *loc; @@ -2611,6 +2624,7 @@ const char *debug_hlsl_expr_op(enum hlsl_ir_expr_op op) [HLSL_OP2_MUL] = "*", [HLSL_OP2_NEQUAL] = "!=", [HLSL_OP2_RSHIFT] = ">>", + [HLSL_OP2_SLT] = "slt", [HLSL_OP3_CMP] = "cmp", [HLSL_OP3_DP2ADD] = "dp2add", @@ -3395,8 +3409,8 @@ static void declare_predefined_types(struct hlsl_ctx *ctx) {"pass", HLSL_CLASS_OBJECT, HLSL_TYPE_PASS, 1, 1}, {"STRING", HLSL_CLASS_OBJECT, HLSL_TYPE_STRING, 1, 1}, {"TEXTURE", HLSL_CLASS_OBJECT, HLSL_TYPE_TEXTURE, 1, 1}, - {"PIXELSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, - {"VERTEXSHADER", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, + {"pixelshader", HLSL_CLASS_OBJECT, HLSL_TYPE_PIXELSHADER, 1, 1}, + {"vertexshader", HLSL_CLASS_OBJECT, HLSL_TYPE_VERTEXSHADER, 1, 1}, {"RenderTargetView",HLSL_CLASS_OBJECT, HLSL_TYPE_RENDERTARGETVIEW, 1, 1}, {"DepthStencilView",HLSL_CLASS_OBJECT, HLSL_TYPE_DEPTHSTENCILVIEW, 1, 1}, }; @@ -3571,10 +3585,10 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil list_init(&ctx->buffers); if (!(ctx->globals_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, - hlsl_strdup(ctx, "$Globals"), NULL, &ctx->location))) + hlsl_strdup(ctx, "$Globals"), 0, NULL, &ctx->location))) return false; if (!(ctx->params_buffer = hlsl_new_buffer(ctx, HLSL_BUFFER_CONSTANT, - hlsl_strdup(ctx, "$Params"), NULL, &ctx->location))) + hlsl_strdup(ctx, "$Params"), 0, NULL, &ctx->location))) return false; ctx->cur_buffer = ctx->globals_buffer; @@ -3593,6 +3607,10 @@ static bool hlsl_ctx_init(struct hlsl_ctx *ctx, const struct vkd3d_shader_compil { ctx->semantic_compat_mapping = option->value & VKD3D_SHADER_COMPILE_OPTION_BACKCOMPAT_MAP_SEMANTIC_NAMES; } + else if (option->name == VKD3D_SHADER_COMPILE_OPTION_CHILD_EFFECT) + { + ctx->child_effect = !!option->value; + } } return true; diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.h b/libs/vkd3d/libs/vkd3d-shader/hlsl.h index df0a53b20de..561782efbf8 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.h +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.h @@ -374,6 +374,7 @@ struct hlsl_attribute #define HLSL_STORAGE_CENTROID 0x00004000 #define HLSL_STORAGE_NOPERSPECTIVE 0x00008000 #define HLSL_STORAGE_LINEAR 0x00010000 +#define HLSL_MODIFIER_SINGLE 0x00020000 #define HLSL_TYPE_MODIFIERS_MASK (HLSL_MODIFIER_PRECISE | HLSL_MODIFIER_VOLATILE | \ HLSL_MODIFIER_CONST | HLSL_MODIFIER_ROW_MAJOR | \ @@ -593,6 +594,8 @@ enum hlsl_ir_expr_op HLSL_OP2_MUL, HLSL_OP2_NEQUAL, HLSL_OP2_RSHIFT, + /* SLT(a, b) retrieves 1.0 if (a < b), else 0.0. Only used for SM1-SM3 target vertex shaders. */ + HLSL_OP2_SLT, /* DP2ADD(a, b, c) computes the scalar product of a.xy and b.xy, * then adds c. */ @@ -798,6 +801,7 @@ struct hlsl_buffer struct vkd3d_shader_location loc; enum hlsl_buffer_type type; const char *name; + uint32_t modifiers; /* Register reserved for this buffer, if any. * If provided, it should be of type 'b' if type is HLSL_BUFFER_CONSTANT and 't' if type is * HLSL_BUFFER_TEXTURE. */ @@ -920,6 +924,7 @@ struct hlsl_ctx uint32_t found_numthreads : 1; bool semantic_compat_mapping; + bool child_effect; }; struct hlsl_resource_load_params @@ -1222,7 +1227,7 @@ struct hlsl_ir_node *hlsl_new_binary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_exp struct hlsl_ir_node *arg2); struct hlsl_ir_node *hlsl_new_bool_constant(struct hlsl_ctx *ctx, bool b, const struct vkd3d_shader_location *loc); struct hlsl_buffer *hlsl_new_buffer(struct hlsl_ctx *ctx, enum hlsl_buffer_type type, const char *name, - const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); + uint32_t modifiers, const struct hlsl_reg_reservation *reservation, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_call(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *decl, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_cast(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_type *type, @@ -1243,6 +1248,8 @@ struct hlsl_ir_node *hlsl_new_if(struct hlsl_ctx *ctx, struct hlsl_ir_node *cond struct hlsl_ir_node *hlsl_new_int_constant(struct hlsl_ctx *ctx, int32_t n, const struct vkd3d_shader_location *loc); struct hlsl_ir_node *hlsl_new_jump(struct hlsl_ctx *ctx, enum hlsl_ir_jump_type type, struct hlsl_ir_node *condition, const struct vkd3d_shader_location *loc); +struct hlsl_ir_node *hlsl_new_ternary_expr(struct hlsl_ctx *ctx, enum hlsl_ir_expr_op op, + struct hlsl_ir_node *arg1, struct hlsl_ir_node *arg2, struct hlsl_ir_node *arg3); void hlsl_init_simple_deref_from_var(struct hlsl_deref *deref, struct hlsl_ir_var *var); @@ -1356,6 +1363,8 @@ bool hlsl_fold_constant_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *inst bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *), struct hlsl_block *block, void *context); +D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type); +D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type); bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic, bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg); bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx); diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l index 558506db108..600e2cf2c6a 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l @@ -76,6 +76,7 @@ case {return KW_CASE; } cbuffer {return KW_CBUFFER; } centroid {return KW_CENTROID; } column_major {return KW_COLUMN_MAJOR; } +ComputeShader {return KW_COMPUTESHADER; } compile {return KW_COMPILE; } const {return KW_CONST; } continue {return KW_CONTINUE; } @@ -83,6 +84,7 @@ DepthStencilState {return KW_DEPTHSTENCILSTATE; } DepthStencilView {return KW_DEPTHSTENCILVIEW; } default {return KW_DEFAULT; } discard {return KW_DISCARD; } +DomainShader {return KW_DOMAINSHADER; } do {return KW_DO; } double {return KW_DOUBLE; } else {return KW_ELSE; } @@ -92,6 +94,7 @@ for {return KW_FOR; } fxgroup {return KW_FXGROUP; } GeometryShader {return KW_GEOMETRYSHADER; } groupshared {return KW_GROUPSHARED; } +HullShader {return KW_HULLSHADER; } if {return KW_IF; } in {return KW_IN; } inline {return KW_INLINE; } @@ -105,7 +108,7 @@ out {return KW_OUT; } packoffset {return KW_PACKOFFSET; } pass {return KW_PASS; } PixelShader {return KW_PIXELSHADER; } -precise {return KW_PRECISE; } +pixelshader {return KW_PIXELSHADER; } RasterizerOrderedBuffer {return KW_RASTERIZERORDEREDBUFFER; } RasterizerOrderedStructuredBuffer {return KW_RASTERIZERORDEREDSTRUCTUREDBUFFER; } RasterizerOrderedTexture1D {return KW_RASTERIZERORDEREDTEXTURE1D; } @@ -163,6 +166,7 @@ typedef {return KW_TYPEDEF; } uniform {return KW_UNIFORM; } vector {return KW_VECTOR; } VertexShader {return KW_VERTEXSHADER; } +vertexshader {return KW_VERTEXSHADER; } void {return KW_VOID; } volatile {return KW_VOLATILE; } while {return KW_WHILE; } diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y index cd05fd008a6..ec8b3d22af2 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y @@ -3155,6 +3155,94 @@ static bool intrinsic_ddy_fine(struct hlsl_ctx *ctx, return !!add_unary_arithmetic_expr(ctx, params->instrs, HLSL_OP1_DSY_FINE, arg, loc); } +static bool intrinsic_determinant(struct hlsl_ctx *ctx, + const struct parse_initializer *params, const struct vkd3d_shader_location *loc) +{ + static const char determinant2x2[] = + "%s determinant(%s2x2 m)\n" + "{\n" + " return m._11 * m._22 - m._12 * m._21;\n" + "}"; + static const char determinant3x3[] = + "%s determinant(%s3x3 m)\n" + "{\n" + " %s2x2 m1 = { m._22, m._23, m._32, m._33 };\n" + " %s2x2 m2 = { m._21, m._23, m._31, m._33 };\n" + " %s2x2 m3 = { m._21, m._22, m._31, m._32 };\n" + " %s3 v1 = { m._11, -m._12, m._13 };\n" + " %s3 v2 = { determinant(m1), determinant(m2), determinant(m3) };\n" + " return dot(v1, v2);\n" + "}"; + static const char determinant4x4[] = + "%s determinant(%s4x4 m)\n" + "{\n" + " %s3x3 m1 = { m._22, m._23, m._24, m._32, m._33, m._34, m._42, m._43, m._44 };\n" + " %s3x3 m2 = { m._21, m._23, m._24, m._31, m._33, m._34, m._41, m._43, m._44 };\n" + " %s3x3 m3 = { m._21, m._22, m._24, m._31, m._32, m._34, m._41, m._42, m._44 };\n" + " %s3x3 m4 = { m._21, m._22, m._23, m._31, m._32, m._33, m._41, m._42, m._43 };\n" + " %s4 v1 = { m._11, -m._12, m._13, -m._14 };\n" + " %s4 v2 = { determinant(m1), determinant(m2), determinant(m3), determinant(m4) };\n" + " return dot(v1, v2);\n" + "}"; + static const char *templates[] = + { + [2] = determinant2x2, + [3] = determinant3x3, + [4] = determinant4x4, + }; + + struct hlsl_ir_node *arg = params->args[0]; + const struct hlsl_type *type = arg->data_type; + struct hlsl_ir_function_decl *func; + const char *typename, *template; + unsigned int dim; + char *body; + + if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_MATRIX) + { + hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Invalid argument type."); + return false; + } + + dim = min(type->dimx, type->dimy); + if (dim == 1) + { + if (!(arg = intrinsic_float_convert_arg(ctx, params, arg, loc))) + return false; + return hlsl_add_load_component(ctx, params->instrs, arg, 0, loc); + } + + typename = type->base_type == HLSL_TYPE_HALF ? "half" : "float"; + template = templates[dim]; + + switch (dim) + { + case 2: + body = hlsl_sprintf_alloc(ctx, template, typename, typename); + break; + case 3: + body = hlsl_sprintf_alloc(ctx, template, typename, typename, typename, + typename, typename, typename, typename); + break; + case 4: + body = hlsl_sprintf_alloc(ctx, template, typename, typename, typename, + typename, typename, typename, typename, typename); + break; + default: + vkd3d_unreachable(); + } + + if (!body) + return false; + + func = hlsl_compile_internal_function(ctx, "determinant", body); + vkd3d_free(body); + if (!func) + return false; + + return add_user_call(ctx, func, params, loc); +} + static bool intrinsic_distance(struct hlsl_ctx *ctx, const struct parse_initializer *params, const struct vkd3d_shader_location *loc) { @@ -4138,6 +4226,7 @@ intrinsic_functions[] = {"ddy_coarse", 1, true, intrinsic_ddy_coarse}, {"ddy_fine", 1, true, intrinsic_ddy_fine}, {"degrees", 1, true, intrinsic_degrees}, + {"determinant", 1, true, intrinsic_determinant}, {"distance", 2, true, intrinsic_distance}, {"dot", 2, true, intrinsic_dot}, {"exp", 1, true, intrinsic_exp}, @@ -5243,6 +5332,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_CENTROID %token KW_COLUMN_MAJOR %token KW_COMPILE +%token KW_COMPUTESHADER %token KW_CONST %token KW_CONTINUE %token KW_DEFAULT @@ -5250,6 +5340,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_DEPTHSTENCILVIEW %token KW_DISCARD %token KW_DO +%token KW_DOMAINSHADER %token KW_DOUBLE %token KW_ELSE %token KW_EXTERN @@ -5258,6 +5349,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_FXGROUP %token KW_GEOMETRYSHADER %token KW_GROUPSHARED +%token KW_HULLSHADER %token KW_IF %token KW_IN %token KW_INLINE @@ -5271,7 +5363,6 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim, %token KW_PACKOFFSET %token KW_PASS %token KW_PIXELSHADER -%token KW_PRECISE %token KW_RASTERIZERORDEREDBUFFER %token KW_RASTERIZERORDEREDSTRUCTUREDBUFFER %token KW_RASTERIZERORDEREDTEXTURE1D @@ -5535,10 +5626,6 @@ technique10: struct hlsl_scope *scope = ctx->cur_scope; hlsl_pop_scope(ctx); - if (ctx->profile->type == VKD3D_SHADER_TYPE_EFFECT && ctx->profile->major_version == 2) - hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX, - "The 'technique10' keyword is invalid for this profile."); - if (!add_technique(ctx, $2, scope, $3, "technique10", &@1)) YYABORT; } @@ -5580,12 +5667,12 @@ effect_group: } buffer_declaration: - buffer_type any_identifier colon_attribute + var_modifiers buffer_type any_identifier colon_attribute { - if ($3.semantic.name) - hlsl_error(ctx, &@3, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); + if ($4.semantic.name) + hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers."); - if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $1, $2, &$3.reg_reservation, &@2))) + if (!(ctx->cur_buffer = hlsl_new_buffer(ctx, $2, $3, $1, &$4.reg_reservation, &@3))) YYABORT; } @@ -6454,6 +6541,14 @@ type_no_void: { $$ = hlsl_get_type(ctx->cur_scope, "DepthStencilView", true, true); } + | KW_VERTEXSHADER + { + $$ = hlsl_get_type(ctx->cur_scope, "VertexShader", true, true); + } + | KW_PIXELSHADER + { + $$ = hlsl_get_type(ctx->cur_scope, "PixelShader", true, true); + } type: type_no_void @@ -6727,10 +6822,6 @@ var_modifiers: { $$ = add_modifiers(ctx, $2, HLSL_STORAGE_NOPERSPECTIVE, &@1); } - | KW_PRECISE var_modifiers - { - $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); - } | KW_SHARED var_modifiers { $$ = add_modifiers(ctx, $2, HLSL_STORAGE_SHARED, &@1); @@ -6779,7 +6870,16 @@ var_modifiers: { $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_INLINE, &@1); } - + | var_identifier var_modifiers + { + if (!strcmp($1, "precise")) + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_PRECISE, &@1); + else if (!strcmp($1, "single")) + $$ = add_modifiers(ctx, $2, HLSL_MODIFIER_SINGLE, &@1); + else + hlsl_error(ctx, &@1, VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER, + "Unknown modifier %s.", debugstr_a($1)); + } complex_initializer: initializer_expr diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c index 307f86f55b7..ff349ab49ef 100644 --- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c +++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c @@ -2647,10 +2647,11 @@ static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx) return false; } -/* Append a FLOOR before a CAST to int or uint (which is written as a mere MOV). */ +/* Turn CAST to int or uint into FLOOR + REINTERPRET (which is written as a mere MOV). */ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { - struct hlsl_ir_node *arg, *floor, *cast2; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }; + struct hlsl_ir_node *arg, *floor, *res; struct hlsl_ir_expr *expr; if (instr->type != HLSL_IR_EXPR) @@ -2665,17 +2666,15 @@ static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, if (arg->data_type->base_type != HLSL_TYPE_FLOAT && arg->data_type->base_type != HLSL_TYPE_HALF) return false; - /* Check that the argument is not already a FLOOR */ - if (arg->type == HLSL_IR_EXPR && hlsl_ir_expr(arg)->op == HLSL_OP1_FLOOR) - return false; - if (!(floor = hlsl_new_unary_expr(ctx, HLSL_OP1_FLOOR, arg, &instr->loc))) return false; hlsl_block_add_instr(block, floor); - if (!(cast2 = hlsl_new_cast(ctx, floor, instr->data_type, &instr->loc))) + memset(operands, 0, sizeof(operands)); + operands[0] = floor; + if (!(res = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) return false; - hlsl_block_add_instr(block, cast2); + hlsl_block_add_instr(block, res); return true; } @@ -2903,7 +2902,7 @@ static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct return true; } -/* Use 'movc' for the ternary operator. */ +/* Use movc/cmp for the ternary operator. */ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 }, *replacement; @@ -2929,7 +2928,7 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru return false; } - if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + if (ctx->profile->major_version < 4) { struct hlsl_ir_node *abs, *neg; @@ -2947,11 +2946,6 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru if (!(replacement = hlsl_new_expr(ctx, HLSL_OP3_CMP, operands, first->data_type, &instr->loc))) return false; } - else if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX) - { - hlsl_fixme(ctx, &instr->loc, "Ternary operator is not implemented for %s profile.", ctx->profile->name); - return false; - } else { if (cond->data_type->base_type == HLSL_TYPE_FLOAT) @@ -2981,6 +2975,261 @@ static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, stru return true; } +static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, + struct hlsl_block *block) +{ + struct hlsl_ir_node *arg1, *arg1_cast, *arg2, *arg2_cast, *slt, *res, *ret; + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS]; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + bool negate = false; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP2_EQUAL && expr->op != HLSL_OP2_NEQUAL && expr->op != HLSL_OP2_LESS + && expr->op != HLSL_OP2_GEQUAL) + return false; + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg1_cast); + + if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg2_cast); + + switch (expr->op) + { + case HLSL_OP2_EQUAL: + case HLSL_OP2_NEQUAL: + { + struct hlsl_ir_node *neg, *sub, *abs, *abs_neg; + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); + + if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) + return false; + hlsl_block_add_instr(block, sub); + + if (ctx->profile->major_version >= 3) + { + if (!(abs = hlsl_new_unary_expr(ctx, HLSL_OP1_ABS, sub, &instr->loc))) + return false; + hlsl_block_add_instr(block, abs); + } + else + { + /* Use MUL as a precarious ABS. */ + if (!(abs = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, sub, sub))) + return false; + hlsl_block_add_instr(block, abs); + } + + if (!(abs_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, abs, &instr->loc))) + return false; + hlsl_block_add_instr(block, abs_neg); + + if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, abs_neg, abs))) + return false; + hlsl_block_add_instr(block, slt); + + negate = (expr->op == HLSL_OP2_EQUAL); + break; + } + + case HLSL_OP2_GEQUAL: + case HLSL_OP2_LESS: + { + if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, arg1_cast, arg2_cast))) + return false; + hlsl_block_add_instr(block, slt); + + negate = (expr->op == HLSL_OP2_GEQUAL); + break; + } + + default: + vkd3d_unreachable(); + } + + if (negate) + { + struct hlsl_constant_value one_value; + struct hlsl_ir_node *one, *slt_neg; + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + if (!(slt_neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) + return false; + hlsl_block_add_instr(block, slt_neg); + + if (!(res = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, slt_neg))) + return false; + hlsl_block_add_instr(block, res); + } + else + { + res = slt; + } + + /* We need a REINTERPRET so that the HLSL IR code is valid. SLT and its arguments must be FLOAT, + * and casts to BOOL have already been lowered to "!= 0". */ + memset(operands, 0, sizeof(operands)); + operands[0] = res; + if (!(ret = hlsl_new_expr(ctx, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, ret); + + return true; +} + +/* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to + * CMP instructions (only available in pixel shaders). + * Based on the following equivalence: + * SLT(x, y) + * = (x < y) ? 1.0 : 0.0 + * = ((x - y) >= 0) ? 0.0 : 1.0 + * = CMP(x - y, 0.0, 1.0) + */ +static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp; + struct hlsl_constant_value zero_value, one_value; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP2_SLT) + return false; + + arg1 = expr->operands[0].node; + arg2 = expr->operands[1].node; + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + if (!(arg1_cast = hlsl_new_cast(ctx, arg1, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg1_cast); + + if (!(arg2_cast = hlsl_new_cast(ctx, arg2, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg2_cast); + + if (!(neg = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, arg2_cast, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg); + + if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, arg1_cast, neg))) + return false; + hlsl_block_add_instr(block, sub); + + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one))) + return false; + hlsl_block_add_instr(block, cmp); + + return true; +} + +/* Intended to be used for SM1-SM3, lowers CMP instructions (only available in pixel shaders) to + * SLT instructions (only available in vertex shaders). + * Based on the following equivalence: + * CMP(x, y, z) + * = (x >= 0) ? y : z + * = z * ((x < 0) ? 1.0 : 0.0) + y * ((x < 0) ? 0.0 : 1.0) + * = z * SLT(x, 0.0) + y * (1 - SLT(x, 0.0)) + */ +static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_node *args[3], *args_cast[3], *slt, *neg_slt, *sub, *zero, *one, *mul1, *mul2, *add; + struct hlsl_constant_value zero_value, one_value; + struct hlsl_type *float_type; + struct hlsl_ir_expr *expr; + unsigned int i; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op != HLSL_OP3_CMP) + return false; + + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + + for (i = 0; i < 3; ++i) + { + args[i] = expr->operands[i].node; + + if (!(args_cast[i] = hlsl_new_cast(ctx, args[i], float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, args_cast[i]); + } + + memset(&zero_value, 0, sizeof(zero_value)); + if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, zero); + + one_value.u[0].f = 1.0; + one_value.u[1].f = 1.0; + one_value.u[2].f = 1.0; + one_value.u[3].f = 1.0; + if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc))) + return false; + hlsl_block_add_instr(block, one); + + if (!(slt = hlsl_new_binary_expr(ctx, HLSL_OP2_SLT, args_cast[0], zero))) + return false; + hlsl_block_add_instr(block, slt); + + if (!(mul1 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[2], slt))) + return false; + hlsl_block_add_instr(block, mul1); + + if (!(neg_slt = hlsl_new_unary_expr(ctx, HLSL_OP1_NEG, slt, &instr->loc))) + return false; + hlsl_block_add_instr(block, neg_slt); + + if (!(sub = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, one, neg_slt))) + return false; + hlsl_block_add_instr(block, sub); + + if (!(mul2 = hlsl_new_binary_expr(ctx, HLSL_OP2_MUL, args_cast[1], sub))) + return false; + hlsl_block_add_instr(block, mul2); + + if (!(add = hlsl_new_binary_expr(ctx, HLSL_OP2_ADD, mul1, mul2))) + return false; + hlsl_block_add_instr(block, add); + + return true; +} + static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) { struct hlsl_type *type = instr->data_type, *arg_type; @@ -3308,6 +3557,61 @@ static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr return true; } +static bool lower_nonfloat_exprs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block) +{ + struct hlsl_ir_expr *expr; + + if (instr->type != HLSL_IR_EXPR) + return false; + expr = hlsl_ir_expr(instr); + if (expr->op == HLSL_OP1_CAST || instr->data_type->base_type == HLSL_TYPE_FLOAT) + return false; + + switch (expr->op) + { + case HLSL_OP1_ABS: + case HLSL_OP1_NEG: + case HLSL_OP2_ADD: + case HLSL_OP2_DIV: + case HLSL_OP2_MAX: + case HLSL_OP2_MIN: + case HLSL_OP2_MUL: + { + struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0}; + struct hlsl_ir_node *arg, *arg_cast, *float_expr, *ret; + struct hlsl_type *float_type; + unsigned int i; + + for (i = 0; i < HLSL_MAX_OPERANDS; ++i) + { + arg = expr->operands[i].node; + if (!arg) + continue; + + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->dimx); + if (!(arg_cast = hlsl_new_cast(ctx, arg, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, arg_cast); + + operands[i] = arg_cast; + } + + float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->dimx); + if (!(float_expr = hlsl_new_expr(ctx, expr->op, operands, float_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, float_expr); + + if (!(ret = hlsl_new_cast(ctx, float_expr, instr->data_type, &instr->loc))) + return false; + hlsl_block_add_instr(block, ret); + + return true; + } + default: + return false; + } +} + static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context) { struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load; @@ -5087,6 +5391,13 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry remove_unreachable_code(ctx, body); hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL); + if (profile-> major_version < 4) + { + lower_ir(ctx, lower_nonfloat_exprs, body); + /* Constants casted to float must be folded. */ + hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL); + } + lower_ir(ctx, lower_nonconstant_vector_derefs, body); lower_ir(ctx, lower_casts_to_bool, body); lower_ir(ctx, lower_int_dot, body); @@ -5108,6 +5419,11 @@ int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry lower_ir(ctx, lower_round, body); lower_ir(ctx, lower_ceil, body); lower_ir(ctx, lower_floor, body); + lower_ir(ctx, lower_comparison_operators, body); + if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL) + lower_ir(ctx, lower_slt, body); + else + lower_ir(ctx, lower_cmp, body); } if (profile->major_version < 2) diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c index f0bd85338c6..4f0226187af 100644 --- a/libs/vkd3d/libs/vkd3d-shader/ir.c +++ b/libs/vkd3d/libs/vkd3d-shader/ir.c @@ -32,6 +32,9 @@ void vsir_program_cleanup(struct vsir_program *program) vkd3d_free((void *)program->block_names[i]); vkd3d_free(program->block_names); shader_instruction_array_destroy(&program->instructions); + shader_signature_cleanup(&program->input_signature); + shader_signature_cleanup(&program->output_signature); + shader_signature_cleanup(&program->patch_constant_signature); } static inline bool shader_register_is_phase_instance_id(const struct vkd3d_shader_register *reg) @@ -91,9 +94,8 @@ static bool vsir_instruction_init_with_params(struct vsir_program *program, return true; } -static enum vkd3d_result instruction_array_lower_texkills(struct vkd3d_shader_parser *parser) +static enum vkd3d_result vsir_program_lower_texkills(struct vsir_program *program) { - struct vsir_program *program = &parser->program; struct vkd3d_shader_instruction_array *instructions = &program->instructions; struct vkd3d_shader_instruction *texkill_ins, *ins; unsigned int components_read = 3 + (program->shader_version.major >= 2); @@ -227,10 +229,11 @@ static const struct vkd3d_shader_varying_map *find_varying_map( return NULL; } -static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info) +static enum vkd3d_result vsir_program_remap_output_signature(struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) { - struct shader_signature *signature = &parser->shader_desc.output_signature; + const struct vkd3d_shader_location location = {.source_name = compile_info->source_name}; + struct shader_signature *signature = &program->output_signature; const struct vkd3d_shader_varying_map_info *varying_map; unsigned int i; @@ -252,7 +255,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars * location with a different mask. */ if (input_mask && input_mask != e->mask) { - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to not yet implemented feature: " "Output mask %#x does not match input mask %#x.", e->mask, input_mask); @@ -269,7 +272,7 @@ static enum vkd3d_result remap_output_signature(struct vkd3d_shader_parser *pars { if (varying_map->varying_map[i].output_signature_index >= signature->element_count) { - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to not yet implemented feature: " "The next stage consumes varyings not written by this stage."); return VKD3D_ERROR_NOT_IMPLEMENTED; @@ -453,7 +456,7 @@ void vsir_dst_param_init(struct vkd3d_shader_dst_param *param, enum vkd3d_shader void vsir_src_param_init_label(struct vkd3d_shader_src_param *param, unsigned int label_id) { - vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UINT, 1); + vsir_src_param_init(param, VKD3DSPR_LABEL, VKD3D_DATA_UNUSED, 1); param->reg.dimension = VSIR_DIMENSION_NONE; param->reg.idx[0].offset = label_id; } @@ -464,12 +467,24 @@ static void src_param_init_ssa_bool(struct vkd3d_shader_src_param *src, unsigned src->reg.idx[0].offset = idx; } +static void src_param_init_temp_bool(struct vkd3d_shader_src_param *src, unsigned int idx) +{ + vsir_src_param_init(src, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); + src->reg.idx[0].offset = idx; +} + static void dst_param_init_ssa_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_SSA, VKD3D_DATA_BOOL, 1); dst->reg.idx[0].offset = idx; } +static void dst_param_init_temp_bool(struct vkd3d_shader_dst_param *dst, unsigned int idx) +{ + vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_BOOL, 1); + dst->reg.idx[0].offset = idx; +} + static void dst_param_init_temp_uint(struct vkd3d_shader_dst_param *dst, unsigned int idx) { vsir_dst_param_init(dst, VKD3DSPR_TEMP, VKD3D_DATA_UINT, 1); @@ -1383,10 +1398,9 @@ static void shader_instruction_normalise_io_params(struct vkd3d_shader_instructi } } -static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parser *parser) +static enum vkd3d_result vsir_program_normalise_io_registers(struct vsir_program *program) { - struct io_normaliser normaliser = {parser->program.instructions}; - struct vsir_program *program = &parser->program; + struct io_normaliser normaliser = {program->instructions}; struct vkd3d_shader_instruction *ins; bool has_control_point_phase; unsigned int i, j; @@ -1394,9 +1408,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse normaliser.phase = VKD3DSIH_INVALID; normaliser.shader_type = program->shader_version.type; normaliser.major = program->shader_version.major; - normaliser.input_signature = &parser->shader_desc.input_signature; - normaliser.output_signature = &parser->shader_desc.output_signature; - normaliser.patch_constant_signature = &parser->shader_desc.patch_constant_signature; + normaliser.input_signature = &program->input_signature; + normaliser.output_signature = &program->output_signature; + normaliser.patch_constant_signature = &program->patch_constant_signature; for (i = 0, has_control_point_phase = false; i < program->instructions.count; ++i) { @@ -1439,9 +1453,9 @@ static enum vkd3d_result shader_normalise_io_registers(struct vkd3d_shader_parse } } - if (!shader_signature_merge(&parser->shader_desc.input_signature, normaliser.input_range_map, false) - || !shader_signature_merge(&parser->shader_desc.output_signature, normaliser.output_range_map, false) - || !shader_signature_merge(&parser->shader_desc.patch_constant_signature, normaliser.pc_range_map, true)) + if (!shader_signature_merge(&program->input_signature, normaliser.input_range_map, false) + || !shader_signature_merge(&program->output_signature, normaliser.output_range_map, false) + || !shader_signature_merge(&program->patch_constant_signature, normaliser.pc_range_map, true)) { program->instructions = normaliser.instructions; return VKD3D_ERROR_OUT_OF_MEMORY; @@ -1668,19 +1682,20 @@ static void remove_dead_code(struct vsir_program *program) } } -static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser *parser) +static enum vkd3d_result vsir_program_normalise_combined_samplers(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) { unsigned int i; - for (i = 0; i < parser->program.instructions.count; ++i) + for (i = 0; i < program->instructions.count; ++i) { - struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; struct vkd3d_shader_src_param *srcs; switch (ins->handler_idx) { case VKD3DSIH_TEX: - if (!(srcs = shader_src_param_allocator_get(&parser->program.instructions.src_params, 3))) + if (!(srcs = shader_src_param_allocator_get(&program->instructions.src_params, 3))) return VKD3D_ERROR_OUT_OF_MEMORY; memset(srcs, 0, sizeof(*srcs) * 3); @@ -1723,7 +1738,7 @@ static enum vkd3d_result normalise_combined_samplers(struct vkd3d_shader_parser case VKD3DSIH_TEXREG2AR: case VKD3DSIH_TEXREG2GB: case VKD3DSIH_TEXREG2RGB: - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(message_context, &ins->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to not yet implemented feature: " "Combined sampler instruction %#x.", ins->handler_idx); return VKD3D_ERROR_NOT_IMPLEMENTED; @@ -1789,10 +1804,10 @@ struct cf_flattener_info struct cf_flattener { - struct vkd3d_shader_parser *parser; + struct vsir_program *program; struct vkd3d_shader_location location; - bool allocation_failed; + enum vkd3d_result status; struct vkd3d_shader_instruction *instructions; size_t instruction_capacity; @@ -1812,13 +1827,20 @@ struct cf_flattener size_t control_flow_info_size; }; +static void cf_flattener_set_error(struct cf_flattener *flattener, enum vkd3d_result error) +{ + if (flattener->status != VKD3D_OK) + return; + flattener->status = error; +} + static struct vkd3d_shader_instruction *cf_flattener_require_space(struct cf_flattener *flattener, size_t count) { if (!vkd3d_array_reserve((void **)&flattener->instructions, &flattener->instruction_capacity, flattener->instruction_count + count, sizeof(*flattener->instructions))) { ERR("Failed to allocate instructions.\n"); - flattener->allocation_failed = true; + cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); return NULL; } return &flattener->instructions[flattener->instruction_count]; @@ -1850,9 +1872,9 @@ static struct vkd3d_shader_src_param *instruction_src_params_alloc(struct vkd3d_ { struct vkd3d_shader_src_param *params; - if (!(params = vsir_program_get_src_params(&flattener->parser->program, count))) + if (!(params = vsir_program_get_src_params(flattener->program, count))) { - flattener->allocation_failed = true; + cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); return NULL; } ins->src = params; @@ -1866,10 +1888,10 @@ static void cf_flattener_emit_label(struct cf_flattener *flattener, unsigned int if (!(ins = cf_flattener_require_space(flattener, 1))) return; - if (vsir_instruction_init_label(ins, &flattener->location, label_id, &flattener->parser->program)) + if (vsir_instruction_init_label(ins, &flattener->location, label_id, flattener->program)) ++flattener->instruction_count; else - flattener->allocation_failed = true; + cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); } /* For conditional branches, this returns the false target branch parameter. */ @@ -1947,7 +1969,7 @@ static struct cf_flattener_info *cf_flattener_push_control_flow_level(struct cf_ flattener->control_flow_depth + 1, sizeof(*flattener->control_flow_info))) { ERR("Failed to allocate control flow info structure.\n"); - flattener->allocation_failed = true; + cf_flattener_set_error(flattener, VKD3D_ERROR_OUT_OF_MEMORY); return NULL; } @@ -2014,12 +2036,12 @@ static void VKD3D_PRINTF_FUNC(3, 4) cf_flattener_create_block_name(struct cf_fla flattener->block_names[block_id] = buffer.buffer; } -static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener) +static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flattener *flattener, + struct vkd3d_shader_message_context *message_context) { bool main_block_open, is_hull_shader, after_declarations_section; - struct vkd3d_shader_parser *parser = flattener->parser; struct vkd3d_shader_instruction_array *instructions; - struct vsir_program *program = &parser->program; + struct vsir_program *program = flattener->program; struct vkd3d_shader_instruction *dst_ins; size_t i; @@ -2041,12 +2063,19 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte flattener->location = instruction->location; /* Declarations should occur before the first code block, which in hull shaders is marked by the first - * phase instruction, and in all other shader types begins with the first label instruction. */ - if (!after_declarations_section && !vsir_instruction_is_dcl(instruction) - && instruction->handler_idx != VKD3DSIH_NOP) + * phase instruction, and in all other shader types begins with the first label instruction. + * Declaring an indexable temp with function scope is not considered a declaration, + * because it needs to live inside a function. */ + if (!after_declarations_section && instruction->handler_idx != VKD3DSIH_NOP) { - after_declarations_section = true; - cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); + bool is_function_indexable = instruction->handler_idx == VKD3DSIH_DCL_INDEXABLE_TEMP + && instruction->declaration.indexable_temp.has_function_scope; + + if (!vsir_instruction_is_dcl(instruction) || is_function_indexable) + { + after_declarations_section = true; + cf_flattener_emit_label(flattener, cf_flattener_alloc_block_id(flattener)); + } } cf_info = flattener->control_flow_depth @@ -2064,7 +2093,8 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte break; case VKD3DSIH_LABEL: - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + vkd3d_shader_error(message_context, &instruction->location, + VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, "Aborting due to not yet implemented feature: Label instruction."); return VKD3D_ERROR_NOT_IMPLEMENTED; @@ -2229,8 +2259,10 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte if (src->swizzle != VKD3D_SHADER_SWIZZLE(X, X, X, X)) { WARN("Unexpected src swizzle %#x.\n", src->swizzle); - vkd3d_shader_parser_error(parser, VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, + vkd3d_shader_error(message_context, &instruction->location, + VKD3D_SHADER_ERROR_VSIR_INVALID_SWIZZLE, "The swizzle for a switch case value is not scalar X."); + cf_flattener_set_error(flattener, VKD3D_ERROR_INVALID_SHADER); } value = *src->reg.u.immconst_u32; @@ -2358,21 +2390,18 @@ static enum vkd3d_result cf_flattener_iterate_instruction_array(struct cf_flatte ++flattener->instruction_count; } - return flattener->allocation_failed ? VKD3D_ERROR_OUT_OF_MEMORY : VKD3D_OK; + return flattener->status; } -static enum vkd3d_result flatten_control_flow_constructs(struct vkd3d_shader_parser *parser) +static enum vkd3d_result vsir_program_flatten_control_flow_constructs(struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) { - struct vsir_program *program = &parser->program; - struct cf_flattener flattener = {0}; + struct cf_flattener flattener = {.program = program}; enum vkd3d_result result; - flattener.parser = parser; - result = cf_flattener_iterate_instruction_array(&flattener); - - if (result >= 0) + if ((result = cf_flattener_iterate_instruction_array(&flattener, message_context)) >= 0) { - vkd3d_free(parser->program.instructions.elements); + vkd3d_free(program->instructions.elements); program->instructions.elements = flattener.instructions; program->instructions.capacity = flattener.instruction_capacity; program->instructions.count = flattener.instruction_count; @@ -2656,33 +2685,36 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } -static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src); +static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, + struct vkd3d_shader_src_param *src); /* This is idempotent: it can be safely applied more than once on the * same register. */ -static void materialize_ssas_to_temps_process_reg(struct vkd3d_shader_parser *parser, struct vkd3d_shader_register *reg) +static void materialize_ssas_to_temps_process_reg(struct vsir_program *program, struct vkd3d_shader_register *reg) { unsigned int i; if (reg->type == VKD3DSPR_SSA) { reg->type = VKD3DSPR_TEMP; - reg->idx[0].offset += parser->program.temp_count; + reg->idx[0].offset += program->temp_count; } for (i = 0; i < reg->idx_count; ++i) if (reg->idx[i].rel_addr) - materialize_ssas_to_temps_process_src_param(parser, reg->idx[i].rel_addr); + materialize_ssas_to_temps_process_src_param(program, reg->idx[i].rel_addr); } -static void materialize_ssas_to_temps_process_dst_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_dst_param *dst) +static void materialize_ssas_to_temps_process_dst_param(struct vsir_program *program, + struct vkd3d_shader_dst_param *dst) { - materialize_ssas_to_temps_process_reg(parser, &dst->reg); + materialize_ssas_to_temps_process_reg(program, &dst->reg); } -static void materialize_ssas_to_temps_process_src_param(struct vkd3d_shader_parser *parser, struct vkd3d_shader_src_param *src) +static void materialize_ssas_to_temps_process_src_param(struct vsir_program *program, + struct vkd3d_shader_src_param *src) { - materialize_ssas_to_temps_process_reg(parser, &src->reg); + materialize_ssas_to_temps_process_reg(program, &src->reg); } static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_source(struct vkd3d_shader_instruction *ins, @@ -2701,7 +2733,7 @@ static const struct vkd3d_shader_src_param *materialize_ssas_to_temps_compute_so vkd3d_unreachable(); } -static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser *parser, +static bool materialize_ssas_to_temps_synthesize_mov(struct vsir_program *program, struct vkd3d_shader_instruction *instruction, const struct vkd3d_shader_location *loc, const struct vkd3d_shader_dst_param *dest, const struct vkd3d_shader_src_param *cond, const struct vkd3d_shader_src_param *source, bool invert) @@ -2709,7 +2741,7 @@ static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser struct vkd3d_shader_src_param *src; struct vkd3d_shader_dst_param *dst; - if (!vsir_instruction_init_with_params(&parser->program, instruction, loc, + if (!vsir_instruction_init_with_params(program, instruction, loc, cond ? VKD3DSIH_MOVC : VKD3DSIH_MOV, 1, cond ? 3 : 1)) return false; @@ -2717,7 +2749,7 @@ static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser src = instruction->src; dst[0] = *dest; - materialize_ssas_to_temps_process_dst_param(parser, &dst[0]); + materialize_ssas_to_temps_process_dst_param(program, &dst[0]); assert(dst[0].write_mask == VKD3DSP_WRITEMASK_0); assert(dst[0].modifiers == 0); @@ -2729,19 +2761,19 @@ static bool materialize_ssas_to_temps_synthesize_mov(struct vkd3d_shader_parser src[1 + invert] = *source; memset(&src[2 - invert], 0, sizeof(src[2 - invert])); src[2 - invert].reg = dst[0].reg; - materialize_ssas_to_temps_process_src_param(parser, &src[1]); - materialize_ssas_to_temps_process_src_param(parser, &src[2]); + materialize_ssas_to_temps_process_src_param(program, &src[1]); + materialize_ssas_to_temps_process_src_param(program, &src[2]); } else { src[0] = *source; - materialize_ssas_to_temps_process_src_param(parser, &src[0]); + materialize_ssas_to_temps_process_src_param(program, &src[0]); } return true; } -static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *parser) +static enum vkd3d_result vsir_program_materialise_ssas_to_temps(struct vsir_program *program) { struct vkd3d_shader_instruction *instructions = NULL; struct materialize_ssas_to_temps_block_data @@ -2752,18 +2784,18 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p size_t ins_capacity = 0, ins_count = 0, i; unsigned int current_label = 0; - if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count)) + if (!reserve_instructions(&instructions, &ins_capacity, program->instructions.count)) goto fail; - if (!(block_index = vkd3d_calloc(parser->program.block_count, sizeof(*block_index)))) + if (!(block_index = vkd3d_calloc(program->block_count, sizeof(*block_index)))) { ERR("Failed to allocate block index.\n"); goto fail; } - for (i = 0; i < parser->program.instructions.count; ++i) + for (i = 0; i < program->instructions.count; ++i) { - struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; switch (ins->handler_idx) { @@ -2785,16 +2817,16 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p } } - for (i = 0; i < parser->program.instructions.count; ++i) + for (i = 0; i < program->instructions.count; ++i) { - struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; + struct vkd3d_shader_instruction *ins = &program->instructions.elements[i]; size_t j; for (j = 0; j < ins->dst_count; ++j) - materialize_ssas_to_temps_process_dst_param(parser, &ins->dst[j]); + materialize_ssas_to_temps_process_dst_param(program, &ins->dst[j]); for (j = 0; j < ins->src_count; ++j) - materialize_ssas_to_temps_process_src_param(parser, &ins->src[j]); + materialize_ssas_to_temps_process_src_param(program, &ins->src[j]); switch (ins->handler_idx) { @@ -2815,9 +2847,10 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p { const struct vkd3d_shader_src_param *source; - source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); - if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, - &parser->program.instructions.elements[j].dst[0], NULL, source, false)) + source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], + current_label); + if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], + &ins->location, &program->instructions.elements[j].dst[0], NULL, source, false)) goto fail; ++ins_count; @@ -2837,9 +2870,10 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p { const struct vkd3d_shader_src_param *source; - source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); - if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, - &parser->program.instructions.elements[j].dst[0], cond, source, false)) + source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], + current_label); + if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], + &ins->location, &program->instructions.elements[j].dst[0], cond, source, false)) goto fail; ++ins_count; @@ -2849,9 +2883,10 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p { const struct vkd3d_shader_src_param *source; - source = materialize_ssas_to_temps_compute_source(&parser->program.instructions.elements[j], current_label); - if (!materialize_ssas_to_temps_synthesize_mov(parser, &instructions[ins_count], &ins->location, - &parser->program.instructions.elements[j].dst[0], cond, source, true)) + source = materialize_ssas_to_temps_compute_source(&program->instructions.elements[j], + current_label); + if (!materialize_ssas_to_temps_synthesize_mov(program, &instructions[ins_count], + &ins->location, &program->instructions.elements[j].dst[0], cond, source, true)) goto fail; ++ins_count; @@ -2873,13 +2908,13 @@ static enum vkd3d_result materialize_ssas_to_temps(struct vkd3d_shader_parser *p instructions[ins_count++] = *ins; } - vkd3d_free(parser->program.instructions.elements); + vkd3d_free(program->instructions.elements); vkd3d_free(block_index); - parser->program.instructions.elements = instructions; - parser->program.instructions.capacity = ins_capacity; - parser->program.instructions.count = ins_count; - parser->program.temp_count += parser->program.ssa_count; - parser->program.ssa_count = 0; + program->instructions.elements = instructions; + program->instructions.capacity = ins_capacity; + program->instructions.count = ins_count; + program->temp_count += program->ssa_count; + program->ssa_count = 0; return VKD3D_OK; @@ -2890,125 +2925,6 @@ fail: return VKD3D_ERROR_OUT_OF_MEMORY; } -static enum vkd3d_result simple_structurizer_run(struct vkd3d_shader_parser *parser) -{ - const unsigned int block_temp_idx = parser->program.temp_count; - struct vkd3d_shader_instruction *instructions = NULL; - const struct vkd3d_shader_location no_loc = {0}; - size_t ins_capacity = 0, ins_count = 0, i; - bool first_label_found = false; - - if (!reserve_instructions(&instructions, &ins_capacity, parser->program.instructions.count)) - goto fail; - - for (i = 0; i < parser->program.instructions.count; ++i) - { - struct vkd3d_shader_instruction *ins = &parser->program.instructions.elements[i]; - - switch (ins->handler_idx) - { - case VKD3DSIH_PHI: - case VKD3DSIH_SWITCH_MONOLITHIC: - vkd3d_unreachable(); - - case VKD3DSIH_LABEL: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 4)) - goto fail; - - if (!first_label_found) - { - first_label_found = true; - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) - goto fail; - dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); - src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); - ins_count++; - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_LOOP, 0, 0)) - goto fail; - ins_count++; - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_SWITCH, 0, 1)) - goto fail; - src_param_init_temp_uint(&instructions[ins_count].src[0], block_temp_idx); - ins_count++; - } - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_CASE, 0, 1)) - goto fail; - src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); - ins_count++; - break; - - case VKD3DSIH_BRANCH: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 2)) - goto fail; - - if (vsir_register_is_label(&ins->src[0].reg)) - { - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOV, 1, 1)) - goto fail; - dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); - src_param_init_const_uint(&instructions[ins_count].src[0], label_from_src_param(&ins->src[0])); - ins_count++; - } - else - { - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_MOVC, 1, 3)) - goto fail; - dst_param_init_temp_uint(&instructions[ins_count].dst[0], block_temp_idx); - instructions[ins_count].src[0] = ins->src[0]; - src_param_init_const_uint(&instructions[ins_count].src[1], label_from_src_param(&ins->src[1])); - src_param_init_const_uint(&instructions[ins_count].src[2], label_from_src_param(&ins->src[2])); - ins_count++; - } - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_BREAK, 0, 0)) - goto fail; - ins_count++; - break; - - case VKD3DSIH_RET: - default: - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 1)) - goto fail; - - instructions[ins_count++] = *ins; - break; - } - } - - assert(first_label_found); - - if (!reserve_instructions(&instructions, &ins_capacity, ins_count + 3)) - goto fail; - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDSWITCH, 0, 0)) - goto fail; - ins_count++; - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_ENDLOOP, 0, 0)) - goto fail; - ins_count++; - - if (!vsir_instruction_init_with_params(&parser->program, &instructions[ins_count], &no_loc, VKD3DSIH_RET, 0, 0)) - goto fail; - ins_count++; - - vkd3d_free(parser->program.instructions.elements); - parser->program.instructions.elements = instructions; - parser->program.instructions.capacity = ins_capacity; - parser->program.instructions.count = ins_count; - parser->program.temp_count += 1; - - return VKD3D_OK; - -fail: - vkd3d_free(instructions); - return VKD3D_ERROR_OUT_OF_MEMORY; -} - struct vsir_block_list { struct vsir_block **blocks; @@ -3025,14 +2941,8 @@ static void vsir_block_list_cleanup(struct vsir_block_list *list) vkd3d_free(list->blocks); } -static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +static enum vkd3d_result vsir_block_list_add_checked(struct vsir_block_list *list, struct vsir_block *block) { - size_t i; - - for (i = 0; i < list->count; ++i) - if (block == list->blocks[i]) - return VKD3D_OK; - if (!vkd3d_array_reserve((void **)&list->blocks, &list->capacity, list->count + 1, sizeof(*list->blocks))) { ERR("Cannot extend block list.\n"); @@ -3044,9 +2954,27 @@ static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struc return VKD3D_OK; } +static enum vkd3d_result vsir_block_list_add(struct vsir_block_list *list, struct vsir_block *block) +{ + size_t i; + + for (i = 0; i < list->count; ++i) + if (block == list->blocks[i]) + return VKD3D_FALSE; + + return vsir_block_list_add_checked(list, block); +} + +/* It is guaranteed that the relative order is kept. */ +static void vsir_block_list_remove_index(struct vsir_block_list *list, size_t idx) +{ + --list->count; + memmove(&list->blocks[idx], &list->blocks[idx + 1], (list->count - idx) * sizeof(*list->blocks)); +} + struct vsir_block { - unsigned int label; + unsigned int label, order_pos; /* `begin' points to the instruction immediately following the * LABEL that introduces the block. `end' points to the terminator * instruction (either BRANCH or RET). They can coincide, meaning @@ -3089,12 +3017,163 @@ static void vsir_block_cleanup(struct vsir_block *block) vkd3d_free(block->dominates); } +static int block_compare(const void *ptr1, const void *ptr2) +{ + const struct vsir_block *block1 = *(const struct vsir_block **)ptr1; + const struct vsir_block *block2 = *(const struct vsir_block **)ptr2; + + return vkd3d_u32_compare(block1->label, block2->label); +} + +static void vsir_block_list_sort(struct vsir_block_list *list) +{ + qsort(list->blocks, list->count, sizeof(*list->blocks), block_compare); +} + +static bool vsir_block_list_search(struct vsir_block_list *list, struct vsir_block *block) +{ + return !!bsearch(&block, list->blocks, list->count, sizeof(*list->blocks), block_compare); +} + +struct vsir_cfg_structure_list +{ + struct vsir_cfg_structure *structures; + size_t count, capacity; + unsigned int end; +}; + +struct vsir_cfg_structure +{ + enum vsir_cfg_structure_type + { + /* Execute a block of the original VSIR program. */ + STRUCTURE_TYPE_BLOCK, + /* Execute a loop, which is identified by an index. */ + STRUCTURE_TYPE_LOOP, + /* Execute a `return' or a (possibly) multilevel `break' or + * `continue', targeting a loop by its index. If `condition' + * is non-NULL, then the jump is conditional (this is + * currently not allowed for `return'). */ + STRUCTURE_TYPE_JUMP, + } type; + union + { + struct vsir_block *block; + struct + { + struct vsir_cfg_structure_list body; + unsigned idx; + } loop; + struct + { + enum vsir_cfg_jump_type + { + /* NONE is available as an intermediate value, but it + * is not allowed in valid structured programs. */ + JUMP_NONE, + JUMP_BREAK, + JUMP_CONTINUE, + JUMP_RET, + } type; + unsigned int target; + struct vkd3d_shader_src_param *condition; + bool invert_condition; + } jump; + } u; +}; + +static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type); +static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure); + +static void vsir_cfg_structure_list_cleanup(struct vsir_cfg_structure_list *list) +{ + unsigned int i; + + for (i = 0; i < list->count; ++i) + vsir_cfg_structure_cleanup(&list->structures[i]); + vkd3d_free(list->structures); +} + +static struct vsir_cfg_structure *vsir_cfg_structure_list_append(struct vsir_cfg_structure_list *list, + enum vsir_cfg_structure_type type) +{ + struct vsir_cfg_structure *ret; + + if (!vkd3d_array_reserve((void **)&list->structures, &list->capacity, list->count + 1, + sizeof(*list->structures))) + return NULL; + + ret = &list->structures[list->count++]; + + vsir_cfg_structure_init(ret, type); + + return ret; +} + +static void vsir_cfg_structure_init(struct vsir_cfg_structure *structure, enum vsir_cfg_structure_type type) +{ + memset(structure, 0, sizeof(*structure)); + structure->type = type; +} + +static void vsir_cfg_structure_cleanup(struct vsir_cfg_structure *structure) +{ + if (structure->type == STRUCTURE_TYPE_LOOP) + vsir_cfg_structure_list_cleanup(&structure->u.loop.body); +} + struct vsir_cfg { + struct vkd3d_shader_message_context *message_context; struct vsir_program *program; struct vsir_block *blocks; struct vsir_block *entry; size_t block_count; + struct vkd3d_string_buffer debug_buffer; + + struct vsir_block_list *loops; + size_t loops_count, loops_capacity; + size_t *loops_by_header; + + struct vsir_block_list order; + struct cfg_loop_interval + { + /* `begin' is the position of the first block of the loop in + * the topological sort; `end' is the position of the first + * block after the loop. In other words, `begin' is where a + * `continue' instruction would jump and `end' is where a + * `break' instruction would jump. */ + unsigned int begin, end; + /* Each loop interval can be natural or synthetic. Natural + * intervals are added to represent loops given by CFG back + * edges. Synthetic intervals do not correspond to loops in + * the input CFG, but are added to leverage their `break' + * instruction in order to execute forward edges. + * + * For a synthetic loop interval it's not really important + * which one is the `begin' block, since we don't need to + * execute `continue' for them. So we have some leeway for + * moving it provided that these conditions are met: 1. the + * interval must contain all `break' instructions that target + * it, which in practice means that `begin' can be moved + * backward and not forward; 2. intervals must remain properly + * nested (for each pair of intervals, either one contains the + * other or they are disjoint). + * + * Subject to these conditions, we try to reuse the same loop + * as much as possible (if many forward edges target the same + * block), but we still try to keep `begin' as forward as + * possible, to keep the loop scope as small as possible. */ + bool synthetic; + } *loop_intervals; + size_t loop_interval_count, loop_interval_capacity; + + struct vsir_cfg_structure_list structured_program; + + struct vkd3d_shader_instruction *instructions; + size_t ins_capacity, ins_count; + unsigned int jump_target_temp_idx; + unsigned int temp_count; }; static void vsir_cfg_cleanup(struct vsir_cfg *cfg) @@ -3104,7 +3183,43 @@ static void vsir_cfg_cleanup(struct vsir_cfg *cfg) for (i = 0; i < cfg->block_count; ++i) vsir_block_cleanup(&cfg->blocks[i]); + for (i = 0; i < cfg->loops_count; ++i) + vsir_block_list_cleanup(&cfg->loops[i]); + + vsir_block_list_cleanup(&cfg->order); + + vsir_cfg_structure_list_cleanup(&cfg->structured_program); + vkd3d_free(cfg->blocks); + vkd3d_free(cfg->loops); + vkd3d_free(cfg->loops_by_header); + vkd3d_free(cfg->loop_intervals); + + if (TRACE_ON()) + vkd3d_string_buffer_cleanup(&cfg->debug_buffer); +} + +static enum vkd3d_result vsir_cfg_add_loop_interval(struct vsir_cfg *cfg, unsigned int begin, + unsigned int end, bool synthetic) +{ + struct cfg_loop_interval *interval; + + if (!vkd3d_array_reserve((void **)&cfg->loop_intervals, &cfg->loop_interval_capacity, + cfg->loop_interval_count + 1, sizeof(*cfg->loop_intervals))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + interval = &cfg->loop_intervals[cfg->loop_interval_count++]; + + interval->begin = begin; + interval->end = end; + interval->synthetic = synthetic; + + return VKD3D_OK; +} + +static bool vsir_block_dominates(struct vsir_block *b1, struct vsir_block *b2) +{ + return bitmap_is_set(b1->dominates, b2->label - 1); } static enum vkd3d_result vsir_cfg_add_edge(struct vsir_cfg *cfg, struct vsir_block *block, @@ -3162,19 +3277,96 @@ static void vsir_cfg_dump_dot(struct vsir_cfg *cfg) TRACE("}\n"); } -static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program) +static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list); + +static void vsir_cfg_structure_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure *structure) +{ + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + TRACE("%sblock %u\n", cfg->debug_buffer.buffer, structure->u.block->label); + break; + + case STRUCTURE_TYPE_LOOP: + TRACE("%s%u : loop {\n", cfg->debug_buffer.buffer, structure->u.loop.idx); + + vsir_cfg_structure_list_dump(cfg, &structure->u.loop.body); + + TRACE("%s} # %u\n", cfg->debug_buffer.buffer, structure->u.loop.idx); + break; + + case STRUCTURE_TYPE_JUMP: + { + const char *type_str; + + switch (structure->u.jump.type) + { + case JUMP_RET: + TRACE("%sret\n", cfg->debug_buffer.buffer); + return; + + case JUMP_BREAK: + type_str = "break"; + break; + + case JUMP_CONTINUE: + type_str = "continue"; + break; + + default: + vkd3d_unreachable(); + } + + TRACE("%s%s%s %u\n", cfg->debug_buffer.buffer, type_str, + structure->u.jump.condition ? "c" : "", structure->u.jump.target); + break; + } + + default: + vkd3d_unreachable(); + } +} + +static void vsir_cfg_structure_list_dump(struct vsir_cfg *cfg, struct vsir_cfg_structure_list *list) +{ + unsigned int i; + + vkd3d_string_buffer_printf(&cfg->debug_buffer, " "); + + for (i = 0; i < list->count; ++i) + vsir_cfg_structure_dump(cfg, &list->structures[i]); + + vkd3d_string_buffer_truncate(&cfg->debug_buffer, cfg->debug_buffer.content_size - 2); +} + +static void vsir_cfg_dump_structured_program(struct vsir_cfg *cfg) +{ + unsigned int i; + + for (i = 0; i < cfg->structured_program.count; ++i) + vsir_cfg_structure_dump(cfg, &cfg->structured_program.structures[i]); +} + +static enum vkd3d_result vsir_cfg_init(struct vsir_cfg *cfg, struct vsir_program *program, + struct vkd3d_shader_message_context *message_context) { struct vsir_block *current_block = NULL; enum vkd3d_result ret; size_t i; memset(cfg, 0, sizeof(*cfg)); + cfg->message_context = message_context; cfg->program = program; cfg->block_count = program->block_count; + vsir_block_list_init(&cfg->order); + if (!(cfg->blocks = vkd3d_calloc(cfg->block_count, sizeof(*cfg->blocks)))) return VKD3D_ERROR_OUT_OF_MEMORY; + if (TRACE_ON()) + vkd3d_string_buffer_init(&cfg->debug_buffer); + for (i = 0; i < program->instructions.count; ++i) { struct vkd3d_shader_instruction *instruction = &program->instructions.elements[i]; @@ -3285,12 +3477,8 @@ static void vsir_cfg_compute_dominators_recurse(struct vsir_block *current, stru static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) { - struct vkd3d_string_buffer buf; size_t i, j; - if (TRACE_ON()) - vkd3d_string_buffer_init(&buf); - for (i = 0; i < cfg->block_count; ++i) { struct vsir_block *block = &cfg->blocks[i]; @@ -3302,7 +3490,7 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) if (TRACE_ON()) { - vkd3d_string_buffer_printf(&buf, "Block %u dominates:", block->label); + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block %u dominates:", block->label); for (j = 0; j < cfg->block_count; j++) { struct vsir_block *block2 = &cfg->blocks[j]; @@ -3310,46 +3498,952 @@ static void vsir_cfg_compute_dominators(struct vsir_cfg *cfg) if (block2->label == 0) continue; - if (bitmap_is_set(block->dominates, j)) - vkd3d_string_buffer_printf(&buf, " %u", block2->label); + if (vsir_block_dominates(block, block2)) + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", block2->label); } - TRACE("%s\n", buf.buffer); - vkd3d_string_buffer_clear(&buf); + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); } } - - if (TRACE_ON()) - vkd3d_string_buffer_cleanup(&buf); } -enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info) +/* A back edge is an edge X -> Y for which block Y dominates block + * X. All the other edges are forward edges, and it is required that + * the input CFG is reducible, i.e., it is acyclic once you strip away + * the back edges. + * + * Each back edge X -> Y defines a loop: block X is the header block, + * block Y is the back edge block, and the loop consists of all the + * blocks which are dominated by the header block and have a path to + * the back edge block that doesn't pass through the header block + * (including the header block itself). It can be proved that all the + * blocks in such a path (connecting a loop block to the back edge + * block without passing through the header block) belong to the same + * loop. + * + * If the input CFG is reducible its loops are properly nested (i.e., + * each two loops are either disjoint or one is contained in the + * other), provided that each block has at most one incoming back + * edge. If this condition does not hold, a synthetic block can be + * introduced as the only back edge block for the given header block, + * with all the previous back edge now being forward edges to the + * synthetic block. This is not currently implemented (but it is + * rarely found in practice anyway). */ +static enum vkd3d_result vsir_cfg_scan_loop(struct vsir_block_list *loop, struct vsir_block *block, + struct vsir_block *header) { - struct vkd3d_shader_instruction_array *instructions = &parser->program.instructions; - enum vkd3d_result result = VKD3D_OK; + enum vkd3d_result ret; + size_t i; - remove_dcl_temps(&parser->program); + if ((ret = vsir_block_list_add(loop, block)) < 0) + return ret; - if ((result = instruction_array_lower_texkills(parser)) < 0) - return result; + if (ret == VKD3D_FALSE || block == header) + return VKD3D_OK; - if (parser->shader_desc.is_dxil) + for (i = 0; i < block->predecessors.count; ++i) { - struct vsir_cfg cfg; + if ((ret = vsir_cfg_scan_loop(loop, block->predecessors.blocks[i], header)) < 0) + return ret; + } - if ((result = lower_switch_to_if_ladder(&parser->program)) < 0) - return result; + return VKD3D_OK; +} - if ((result = materialize_ssas_to_temps(parser)) < 0) - return result; +static enum vkd3d_result vsir_cfg_compute_loops(struct vsir_cfg *cfg) +{ + size_t i, j, k; - if ((result = vsir_cfg_init(&cfg, &parser->program)) < 0) - return result; + if (!(cfg->loops_by_header = vkd3d_calloc(cfg->block_count, sizeof(*cfg->loops_by_header)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + memset(cfg->loops_by_header, 0xff, cfg->block_count * sizeof(*cfg->loops_by_header)); - vsir_cfg_compute_dominators(&cfg); + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; - if ((result = simple_structurizer_run(parser)) < 0) - { + if (block->label == 0) + continue; + + for (j = 0; j < block->successors.count; ++j) + { + struct vsir_block *header = block->successors.blocks[j]; + struct vsir_block_list *loop; + enum vkd3d_result ret; + + /* Is this a back edge? */ + if (!vsir_block_dominates(header, block)) + continue; + + if (!vkd3d_array_reserve((void **)&cfg->loops, &cfg->loops_capacity, cfg->loops_count + 1, sizeof(*cfg->loops))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + loop = &cfg->loops[cfg->loops_count]; + vsir_block_list_init(loop); + + if ((ret = vsir_cfg_scan_loop(loop, block, header)) < 0) + return ret; + + vsir_block_list_sort(loop); + + if (TRACE_ON()) + { + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Back edge %u -> %u with loop:", block->label, header->label); + + for (k = 0; k < loop->count; ++k) + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", loop->blocks[k]->label); + + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + } + + if (cfg->loops_by_header[header->label - 1] != SIZE_MAX) + { + FIXME("Block %u is header to more than one loop, this is not implemented.\n", header->label); + vkd3d_shader_error(cfg->message_context, &header->begin->location, VKD3D_SHADER_ERROR_VSIR_NOT_IMPLEMENTED, + "Block %u is header to more than one loop, this is not implemented.", header->label); + return VKD3D_ERROR_NOT_IMPLEMENTED; + } + + cfg->loops_by_header[header->label - 1] = cfg->loops_count; + + ++cfg->loops_count; + } + } + + return VKD3D_OK; +} + +struct vsir_cfg_node_sorter +{ + struct vsir_cfg *cfg; + struct vsir_cfg_node_sorter_stack_item + { + struct vsir_block_list *loop; + unsigned int seen_count; + unsigned int begin; + } *stack; + size_t stack_count, stack_capacity; + struct vsir_block_list available_blocks; +}; + +static enum vkd3d_result vsir_cfg_node_sorter_make_node_available(struct vsir_cfg_node_sorter *sorter, struct vsir_block *block) +{ + struct vsir_block_list *loop = NULL; + struct vsir_cfg_node_sorter_stack_item *item; + enum vkd3d_result ret; + + if (sorter->cfg->loops_by_header[block->label - 1] != SIZE_MAX) + loop = &sorter->cfg->loops[sorter->cfg->loops_by_header[block->label - 1]]; + + if ((ret = vsir_block_list_add_checked(&sorter->available_blocks, block)) < 0) + return ret; + + if (!loop) + return VKD3D_OK; + + if (!vkd3d_array_reserve((void **)&sorter->stack, &sorter->stack_capacity, sorter->stack_count + 1, sizeof(*sorter->stack))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + item = &sorter->stack[sorter->stack_count++]; + item->loop = loop; + item->seen_count = 0; + item->begin = sorter->cfg->order.count; + + return VKD3D_OK; +} + +/* Topologically sort the blocks according to the forward edges. By + * definition if the input CFG is reducible then its forward edges + * form a DAG, so a topological sorting exists. In order to compute it + * we keep an array with the incoming degree for each block and an + * available list of all the blocks whose incoming degree has reached + * zero. At each step we pick a block from the available list and + * strip it away from the graph, updating the incoming degrees and + * available list. + * + * In principle at each step we can pick whatever node we want from + * the available list, and will get a topological sort + * anyway. However, we use these two criteria to give to the computed + * order additional properties: + * + * 1. we keep track of which loops we're into, and pick blocks + * belonging to the current innermost loop, so that loops are kept + * contiguous in the order; this can always be done when the input + * CFG is reducible; + * + * 2. subject to the requirement above, we always pick the most + * recently added block to the available list, because this tends + * to keep related blocks and require fewer control flow + * primitives. + */ +static enum vkd3d_result vsir_cfg_sort_nodes(struct vsir_cfg *cfg) +{ + struct vsir_cfg_node_sorter sorter = { .cfg = cfg }; + unsigned int *in_degrees = NULL; + enum vkd3d_result ret; + size_t i; + + if (!(in_degrees = vkd3d_calloc(cfg->block_count, sizeof(*in_degrees)))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + { + in_degrees[i] = UINT_MAX; + continue; + } + + in_degrees[i] = block->predecessors.count; + + /* Do not count back edges. */ + if (cfg->loops_by_header[i] != SIZE_MAX) + { + assert(in_degrees[i] > 0); + in_degrees[i] -= 1; + } + + if (in_degrees[i] == 0 && block != cfg->entry) + { + WARN("Unexpected entry point %u.\n", block->label); + vkd3d_shader_error(cfg->message_context, &block->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "Block %u is unreachable from the entry point.", block->label); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + } + + if (in_degrees[cfg->entry->label - 1] != 0) + { + WARN("Entry point has %u incoming forward edges.\n", in_degrees[cfg->entry->label - 1]); + vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "The entry point block has %u incoming forward edges.", in_degrees[cfg->entry->label - 1]); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + vsir_block_list_init(&sorter.available_blocks); + + if ((ret = vsir_cfg_node_sorter_make_node_available(&sorter, cfg->entry)) < 0) + goto fail; + + while (sorter.available_blocks.count != 0) + { + struct vsir_cfg_node_sorter_stack_item *inner_stack_item = NULL; + struct vsir_block *block; + size_t new_seen_count; + + if (sorter.stack_count != 0) + inner_stack_item = &sorter.stack[sorter.stack_count - 1]; + + for (i = sorter.available_blocks.count - 1; ; --i) + { + if (i == SIZE_MAX) + { + ERR("Couldn't find any viable next block, is the input CFG reducible?\n"); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + block = sorter.available_blocks.blocks[i]; + + if (!inner_stack_item || vsir_block_list_search(inner_stack_item->loop, block)) + break; + } + + vsir_block_list_remove_index(&sorter.available_blocks, i); + block->order_pos = cfg->order.count; + if ((ret = vsir_block_list_add_checked(&cfg->order, block)) < 0) + goto fail; + + /* Close loops: since each loop is a strict subset of any + * outer loop, we just need to track how many blocks we've + * seen; when I close a loop I mark the same number of seen + * blocks for the next outer loop. */ + new_seen_count = 1; + while (sorter.stack_count != 0) + { + inner_stack_item = &sorter.stack[sorter.stack_count - 1]; + + inner_stack_item->seen_count += new_seen_count; + + assert(inner_stack_item->seen_count <= inner_stack_item->loop->count); + if (inner_stack_item->seen_count != inner_stack_item->loop->count) + break; + + if ((ret = vsir_cfg_add_loop_interval(cfg, inner_stack_item->begin, + cfg->order.count, false)) < 0) + goto fail; + + new_seen_count = inner_stack_item->loop->count; + --sorter.stack_count; + } + + /* Remove (forward) edges and make new nodes available. */ + for (i = 0; i < block->successors.count; ++i) + { + struct vsir_block *successor = block->successors.blocks[i]; + + if (vsir_block_dominates(successor, block)) + continue; + + assert(in_degrees[successor->label - 1] > 0); + --in_degrees[successor->label - 1]; + + if (in_degrees[successor->label - 1] == 0) + { + if ((ret = vsir_cfg_node_sorter_make_node_available(&sorter, successor)) < 0) + goto fail; + } + } + } + + if (cfg->order.count != cfg->block_count) + { + /* There is a cycle of forward edges. */ + WARN("The control flow graph is not reducible.\n"); + vkd3d_shader_error(cfg->message_context, &cfg->entry->begin->location, VKD3D_SHADER_ERROR_VSIR_INVALID_CONTROL_FLOW, + "The control flow graph is not reducible."); + ret = VKD3D_ERROR_INVALID_SHADER; + goto fail; + } + + assert(sorter.stack_count == 0); + + vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); + vsir_block_list_cleanup(&sorter.available_blocks); + + if (TRACE_ON()) + { + vkd3d_string_buffer_printf(&cfg->debug_buffer, "Block order:"); + + for (i = 0; i < cfg->order.count; ++i) + vkd3d_string_buffer_printf(&cfg->debug_buffer, " %u", cfg->order.blocks[i]->label); + + TRACE("%s\n", cfg->debug_buffer.buffer); + vkd3d_string_buffer_clear(&cfg->debug_buffer); + } + + return VKD3D_OK; + +fail: + vkd3d_free(in_degrees); + vkd3d_free(sorter.stack); + vsir_block_list_cleanup(&sorter.available_blocks); + + return ret; +} + +/* Sort loop intervals first by ascending begin time and then by + * descending end time, so that inner intervals appear after outer + * ones and disjoint intervals appear in their proper order. */ +static int compare_loop_intervals(const void *ptr1, const void *ptr2) +{ + const struct cfg_loop_interval *interval1 = ptr1; + const struct cfg_loop_interval *interval2 = ptr2; + + if (interval1->begin != interval2->begin) + return vkd3d_u32_compare(interval1->begin, interval2->begin); + + return -vkd3d_u32_compare(interval1->end, interval2->end); +} + +static enum vkd3d_result vsir_cfg_generate_synthetic_loop_intervals(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + size_t i, j, k; + + for (i = 0; i < cfg->block_count; ++i) + { + struct vsir_block *block = &cfg->blocks[i]; + + if (block->label == 0) + continue; + + for (j = 0; j < block->successors.count; ++j) + { + struct vsir_block *successor = block->successors.blocks[j]; + struct cfg_loop_interval *extend = NULL; + unsigned int begin; + enum + { + ACTION_DO_NOTHING, + ACTION_CREATE_NEW, + ACTION_EXTEND, + } action = ACTION_CREATE_NEW; + + /* We've already contructed loop intervals for the back + * edges, there's nothing more to do. */ + if (vsir_block_dominates(successor, block)) + continue; + + assert(block->order_pos < successor->order_pos); + + /* Jumping from a block to the following one is always + * possible, so nothing to do. */ + if (block->order_pos + 1 == successor->order_pos) + continue; + + /* Let's look for a loop interval that already breaks at + * `successor' and either contains or can be extended to + * contain `block'. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (interval->end != successor->order_pos) + continue; + + if (interval->begin <= block->order_pos) + { + action = ACTION_DO_NOTHING; + break; + } + + if (interval->synthetic) + { + action = ACTION_EXTEND; + extend = interval; + break; + } + } + + if (action == ACTION_DO_NOTHING) + continue; + + /* Ok, we have to decide where the new or replacing + * interval has to begin. These are the rules: 1. it must + * begin before `block'; 2. intervals must be properly + * nested; 3. the new interval should begin as late as + * possible, to limit control flow depth and extension. */ + begin = block->order_pos; + + /* Our candidate interval is always [begin, + * successor->order_pos), and we move `begin' backward + * until the candidate interval contains all the intervals + * whose endpoint lies in the candidate interval + * itself. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (begin < interval->end && interval->end < successor->order_pos) + begin = min(begin, interval->begin); + } + + /* New we have to care about the intervals whose begin + * point lies in the candidate interval. We cannot move + * the candidate interval endpoint, because it is + * important that the loop break target matches + * `successor'. So we have to move that interval's begin + * point to the begin point of the candidate interval, + * i.e. `begin'. But what if the interval we should extend + * backward is not synthetic? This cannot happen, + * fortunately, because it would mean that there is a jump + * entering a loop via a block which is not the loop + * header, so the CFG would not be reducible. */ + for (k = 0; k < cfg->loop_interval_count; ++k) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[k]; + + if (interval->begin < successor->order_pos && successor->order_pos < interval->end) + { + if (interval->synthetic) + interval->begin = min(begin, interval->begin); + assert(begin >= interval->begin); + } + } + + if (action == ACTION_EXTEND) + extend->begin = begin; + else if ((ret = vsir_cfg_add_loop_interval(cfg, begin, successor->order_pos, true)) < 0) + return ret; + } + } + + qsort(cfg->loop_intervals, cfg->loop_interval_count, sizeof(*cfg->loop_intervals), compare_loop_intervals); + + if (TRACE_ON()) + for (i = 0; i < cfg->loop_interval_count; ++i) + TRACE("%s loop interval %u - %u\n", cfg->loop_intervals[i].synthetic ? "Synthetic" : "Natural", + cfg->loop_intervals[i].begin, cfg->loop_intervals[i].end); + + return VKD3D_OK; +} + +struct vsir_cfg_edge_action +{ + enum vsir_cfg_jump_type jump_type; + unsigned int target; + struct vsir_block *successor; +}; + +static void vsir_cfg_compute_edge_action(struct vsir_cfg *cfg, struct vsir_block *block, + struct vsir_block *successor, struct vsir_cfg_edge_action *action) +{ + unsigned int i; + + action->target = UINT_MAX; + action->successor = successor; + + if (successor->order_pos <= block->order_pos) + { + /* The successor is before the current block, so we have to + * use `continue'. The target loop is the innermost that + * contains the current block and has the successor as + * `continue' target. */ + for (i = 0; i < cfg->loop_interval_count; ++i) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[i]; + + if (interval->begin == successor->order_pos && block->order_pos < interval->end) + action->target = i; + + if (interval->begin > successor->order_pos) + break; + } + + assert(action->target != UINT_MAX); + action->jump_type = JUMP_CONTINUE; + } + else + { + /* The successor is after the current block, so we have to use + * `break', or possibly just jump to the following block. The + * target loop is the outermost that contains the current + * block and has the successor as `break' target. */ + for (i = 0; i < cfg->loop_interval_count; ++i) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[i]; + + if (interval->begin <= block->order_pos && interval->end == successor->order_pos) + { + action->target = i; + break; + } + } + + if (action->target == UINT_MAX) + { + assert(successor->order_pos == block->order_pos + 1); + action->jump_type = JUMP_NONE; + } + else + { + action->jump_type = JUMP_BREAK; + } + } +} + +static enum vkd3d_result vsir_cfg_build_structured_program(struct vsir_cfg *cfg) +{ + unsigned int i, stack_depth = 1, open_interval_idx = 0; + struct vsir_cfg_structure_list **stack = NULL; + + /* It's enough to allocate up to the maximum interval stacking + * depth (plus one for the full program), but this is simpler. */ + if (!(stack = vkd3d_calloc(cfg->loop_interval_count + 1, sizeof(*stack)))) + goto fail; + cfg->structured_program.end = cfg->order.count; + stack[0] = &cfg->structured_program; + + for (i = 0; i < cfg->order.count; ++i) + { + struct vsir_block *block = cfg->order.blocks[i]; + struct vsir_cfg_structure *structure; + + assert(stack_depth > 0); + + /* Open loop intervals. */ + while (open_interval_idx < cfg->loop_interval_count) + { + struct cfg_loop_interval *interval = &cfg->loop_intervals[open_interval_idx]; + + if (interval->begin != i) + break; + + if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_LOOP))) + goto fail; + structure->u.loop.idx = open_interval_idx++; + + structure->u.loop.body.end = interval->end; + stack[stack_depth++] = &structure->u.loop.body; + } + + /* Execute the block. */ + if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_BLOCK))) + goto fail; + structure->u.block = block; + + /* Generate between zero and two jump instructions. */ + switch (block->end->handler_idx) + { + case VKD3DSIH_BRANCH: + { + struct vsir_cfg_edge_action action_true, action_false; + bool invert_condition = false; + + if (vsir_register_is_label(&block->end->src[0].reg)) + { + unsigned int target = label_from_src_param(&block->end->src[0]); + struct vsir_block *successor = &cfg->blocks[target - 1]; + + vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); + action_false = action_true; + } + else + { + unsigned int target = label_from_src_param(&block->end->src[1]); + struct vsir_block *successor = &cfg->blocks[target - 1]; + + vsir_cfg_compute_edge_action(cfg, block, successor, &action_true); + + target = label_from_src_param(&block->end->src[2]); + successor = &cfg->blocks[target - 1]; + + vsir_cfg_compute_edge_action(cfg, block, successor, &action_false); + } + + /* This will happen if the branch is unconditional, + * but also if it's conditional with the same target + * in both branches, which can happen in some corner + * cases, e.g. when converting switch instructions to + * selection ladders. */ + if (action_true.successor == action_false.successor) + { + assert(action_true.jump_type == action_false.jump_type); + } + else + { + /* At most one branch can just fall through to the + * next block, in which case we make sure it's the + * false branch. */ + if (action_true.jump_type == JUMP_NONE) + { + struct vsir_cfg_edge_action tmp = action_true; + action_true = action_false; + action_false = tmp; + invert_condition = true; + } + + assert(action_true.jump_type != JUMP_NONE); + + if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) + goto fail; + structure->u.jump.type = action_true.jump_type; + structure->u.jump.target = action_true.target; + structure->u.jump.condition = &block->end->src[0]; + structure->u.jump.invert_condition = invert_condition; + } + + if (action_false.jump_type != JUMP_NONE) + { + if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) + goto fail; + structure->u.jump.type = action_false.jump_type; + structure->u.jump.target = action_false.target; + } + break; + } + + case VKD3DSIH_RET: + if (!(structure = vsir_cfg_structure_list_append(stack[stack_depth - 1], STRUCTURE_TYPE_JUMP))) + goto fail; + structure->u.jump.type = JUMP_RET; + break; + + default: + vkd3d_unreachable(); + } + + /* Close loop intervals. */ + while (stack_depth > 0) + { + if (stack[stack_depth - 1]->end != i + 1) + break; + + --stack_depth; + } + } + + assert(stack_depth == 0); + assert(open_interval_idx == cfg->loop_interval_count); + + if (TRACE_ON()) + vsir_cfg_dump_structured_program(cfg); + + vkd3d_free(stack); + + return VKD3D_OK; + +fail: + vkd3d_free(stack); + + return VKD3D_ERROR_OUT_OF_MEMORY; +} + +static enum vkd3d_result vsir_cfg_structure_list_emit(struct vsir_cfg *cfg, + struct vsir_cfg_structure_list *list, unsigned int loop_idx) +{ + const struct vkd3d_shader_location no_loc = {0}; + enum vkd3d_result ret; + size_t i; + + for (i = 0; i < list->count; ++i) + { + struct vsir_cfg_structure *structure = &list->structures[i]; + + switch (structure->type) + { + case STRUCTURE_TYPE_BLOCK: + { + struct vsir_block *block = structure->u.block; + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + (block->end - block->begin))) + return VKD3D_ERROR_OUT_OF_MEMORY; + + memcpy(&cfg->instructions[cfg->ins_count], block->begin, (char *)block->end - (char *)block->begin); + + cfg->ins_count += block->end - block->begin; + break; + } + + case STRUCTURE_TYPE_LOOP: + { + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_LOOP); + + if ((ret = vsir_cfg_structure_list_emit(cfg, &structure->u.loop.body, structure->u.loop.idx)) < 0) + return ret; + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 5)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + vsir_instruction_init(&cfg->instructions[cfg->ins_count++], &no_loc, VKD3DSIH_ENDLOOP); + + /* Add a trampoline to implement multilevel jumping depending on the stored + * jump_target value. */ + if (loop_idx != UINT_MAX) + { + /* If the multilevel jump is a `continue' and the target is the loop we're inside + * right now, then we can finally do the `continue'. */ + const unsigned int outer_continue_target = loop_idx << 1 | 1; + /* If the multilevel jump is a `continue' to any other target, or if it is a `break' + * and the target is not the loop we just finished emitting, then it means that + * we have to reach an outer loop, so we keep breaking. */ + const unsigned int inner_break_target = structure->u.loop.idx << 1; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_IEQ, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); + src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); + src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], outer_continue_target); + + ++cfg->ins_count; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_CONTINUEP, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); + + ++cfg->ins_count; + ++cfg->temp_count; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_IEQ, 1, 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst_param_init_temp_bool(&cfg->instructions[cfg->ins_count].dst[0], cfg->temp_count); + src_param_init_temp_uint(&cfg->instructions[cfg->ins_count].src[0], cfg->jump_target_temp_idx); + src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[1], inner_break_target); + + ++cfg->ins_count; + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_BREAKP, 0, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + + src_param_init_temp_bool(&cfg->instructions[cfg->ins_count].src[0], cfg->temp_count); + + ++cfg->ins_count; + ++cfg->temp_count; + } + + break; + } + + case STRUCTURE_TYPE_JUMP: + { + /* Encode the jump target as the loop index plus a bit to remember whether + * we're breaking or continueing. */ + unsigned int jump_target = structure->u.jump.target << 1; + enum vkd3d_shader_opcode opcode; + + switch (structure->u.jump.type) + { + case JUMP_CONTINUE: + /* If we're continueing the loop we're directly inside, then we can emit a + * `continue'. Otherwise we first have to break all the loops between here + * and the loop to continue, recording our intention to continue + * in the lowest bit of jump_target. */ + if (structure->u.jump.target == loop_idx) + { + opcode = structure->u.jump.condition ? VKD3DSIH_CONTINUEP : VKD3DSIH_CONTINUE; + break; + } + jump_target |= 1; + /* fall through */ + + case JUMP_BREAK: + opcode = structure->u.jump.condition ? VKD3DSIH_BREAKP : VKD3DSIH_BREAK; + break; + + case JUMP_RET: + assert(!structure->u.jump.condition); + opcode = VKD3DSIH_RET; + break; + + default: + vkd3d_unreachable(); + } + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->ins_count + 2)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (opcode == VKD3DSIH_BREAK || opcode == VKD3DSIH_BREAKP) + { + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, VKD3DSIH_MOV, 1, 1)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + dst_param_init_temp_uint(&cfg->instructions[cfg->ins_count].dst[0], cfg->jump_target_temp_idx); + src_param_init_const_uint(&cfg->instructions[cfg->ins_count].src[0], jump_target); + + ++cfg->ins_count; + } + + if (!vsir_instruction_init_with_params(cfg->program, &cfg->instructions[cfg->ins_count], + &no_loc, opcode, 0, !!structure->u.jump.condition)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + if (structure->u.jump.invert_condition) + cfg->instructions[cfg->ins_count].flags |= VKD3D_SHADER_CONDITIONAL_OP_Z; + + if (structure->u.jump.condition) + cfg->instructions[cfg->ins_count].src[0] = *structure->u.jump.condition; + + ++cfg->ins_count; + break; + } + + default: + vkd3d_unreachable(); + } + } + + return VKD3D_OK; +} + +static enum vkd3d_result vsir_cfg_emit_structured_program(struct vsir_cfg *cfg) +{ + enum vkd3d_result ret; + size_t i; + + cfg->jump_target_temp_idx = cfg->program->temp_count; + cfg->temp_count = cfg->program->temp_count + 1; + + if (!reserve_instructions(&cfg->instructions, &cfg->ins_capacity, cfg->program->instructions.count)) + return VKD3D_ERROR_OUT_OF_MEMORY; + + /* Copy declarations until the first block. */ + for (i = 0; i < cfg->program->instructions.count; ++i) + { + struct vkd3d_shader_instruction *ins = &cfg->program->instructions.elements[i]; + + if (ins->handler_idx == VKD3DSIH_LABEL) + break; + + cfg->instructions[cfg->ins_count++] = *ins; + } + + if ((ret = vsir_cfg_structure_list_emit(cfg, &cfg->structured_program, UINT_MAX)) < 0) + goto fail; + + vkd3d_free(cfg->program->instructions.elements); + cfg->program->instructions.elements = cfg->instructions; + cfg->program->instructions.capacity = cfg->ins_capacity; + cfg->program->instructions.count = cfg->ins_count; + cfg->program->temp_count = cfg->temp_count; + + return VKD3D_OK; + +fail: + vkd3d_free(cfg->instructions); + + return ret; +} + +enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context) +{ + enum vkd3d_result result = VKD3D_OK; + + remove_dcl_temps(program); + + if ((result = vsir_program_lower_texkills(program)) < 0) + return result; + + if (program->shader_version.major >= 6) + { + struct vsir_cfg cfg; + + if ((result = lower_switch_to_if_ladder(program)) < 0) + return result; + + if ((result = vsir_program_materialise_ssas_to_temps(program)) < 0) + return result; + + if ((result = vsir_cfg_init(&cfg, program, message_context)) < 0) + return result; + + vsir_cfg_compute_dominators(&cfg); + + if ((result = vsir_cfg_compute_loops(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + + if ((result = vsir_cfg_sort_nodes(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + + if ((result = vsir_cfg_generate_synthetic_loop_intervals(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + + if ((result = vsir_cfg_build_structured_program(&cfg)) < 0) + { + vsir_cfg_cleanup(&cfg); + return result; + } + + if ((result = vsir_cfg_emit_structured_program(&cfg)) < 0) + { vsir_cfg_cleanup(&cfg); return result; } @@ -3358,55 +4452,55 @@ enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, } else { - if (parser->program.shader_version.type != VKD3D_SHADER_TYPE_PIXEL) + if (program->shader_version.type != VKD3D_SHADER_TYPE_PIXEL) { - if ((result = remap_output_signature(parser, compile_info)) < 0) + if ((result = vsir_program_remap_output_signature(program, compile_info, message_context)) < 0) return result; } - if (parser->program.shader_version.type == VKD3D_SHADER_TYPE_HULL) + if (program->shader_version.type == VKD3D_SHADER_TYPE_HULL) { - if ((result = instruction_array_flatten_hull_shader_phases(instructions)) < 0) + if ((result = instruction_array_flatten_hull_shader_phases(&program->instructions)) < 0) return result; - if ((result = instruction_array_normalise_hull_shader_control_point_io(instructions, - &parser->shader_desc.input_signature)) < 0) + if ((result = instruction_array_normalise_hull_shader_control_point_io(&program->instructions, + &program->input_signature)) < 0) return result; } - if ((result = shader_normalise_io_registers(parser)) < 0) + if ((result = vsir_program_normalise_io_registers(program)) < 0) return result; - if ((result = instruction_array_normalise_flat_constants(&parser->program)) < 0) + if ((result = instruction_array_normalise_flat_constants(program)) < 0) return result; - remove_dead_code(&parser->program); + remove_dead_code(program); - if ((result = normalise_combined_samplers(parser)) < 0) + if ((result = vsir_program_normalise_combined_samplers(program, message_context)) < 0) return result; } - if ((result = flatten_control_flow_constructs(parser)) < 0) + if ((result = vsir_program_flatten_control_flow_constructs(program, message_context)) < 0) return result; if (TRACE_ON()) - vkd3d_shader_trace(&parser->program); + vkd3d_shader_trace(program); - if (!parser->failed && (result = vsir_validate(parser)) < 0) + if ((result = vsir_program_validate(program, config_flags, + compile_info->source_name, message_context)) < 0) return result; - if (parser->failed) - result = VKD3D_ERROR_INVALID_SHADER; - return result; } struct validation_context { - struct vkd3d_shader_parser *parser; + struct vkd3d_shader_message_context *message_context; const struct vsir_program *program; size_t instruction_idx; + struct vkd3d_shader_location null_location; bool invalid_instruction_idx; + enum vkd3d_result status; bool dcl_temps_found; enum vkd3d_shader_opcode phase; enum cf_type @@ -3452,16 +4546,21 @@ static void VKD3D_PRINTF_FUNC(3, 4) validator_error(struct validation_context *c if (ctx->invalid_instruction_idx) { - vkd3d_shader_parser_error(ctx->parser, error, "%s", buf.buffer); + vkd3d_shader_error(ctx->message_context, &ctx->null_location, error, "%s", buf.buffer); ERR("VSIR validation error: %s\n", buf.buffer); } else { - vkd3d_shader_parser_error(ctx->parser, error, "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); + const struct vkd3d_shader_instruction *ins = &ctx->program->instructions.elements[ctx->instruction_idx]; + vkd3d_shader_error(ctx->message_context, &ins->location, error, + "instruction %zu: %s", ctx->instruction_idx + 1, buf.buffer); ERR("VSIR validation error: instruction %zu: %s\n", ctx->instruction_idx + 1, buf.buffer); } vkd3d_string_buffer_cleanup(&buf); + + if (!ctx->status) + ctx->status = VKD3D_ERROR_INVALID_SHADER; } static void vsir_validate_src_param(struct validation_context *ctx, @@ -3515,10 +4614,10 @@ static void vsir_validate_register(struct validation_context *ctx, if (reg->idx[0].rel_addr) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "Non-NULL relative address for a TEMP register."); - if (reg->idx[0].offset >= ctx->parser->program.temp_count) + if (reg->idx[0].offset >= ctx->program->temp_count) { validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_INDEX, "TEMP register index %u exceeds the maximum count %u.", - reg->idx[0].offset, ctx->parser->program.temp_count); + reg->idx[0].offset, ctx->program->temp_count); break; } @@ -3606,7 +4705,7 @@ static void vsir_validate_register(struct validation_context *ctx, validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_PRECISION, "Invalid precision %#x for a LABEL register.", reg->precision); - if (reg->data_type != VKD3D_DATA_UINT) + if (reg->data_type != VKD3D_DATA_UNUSED) validator_error(ctx, VKD3D_SHADER_ERROR_VSIR_INVALID_DATA_TYPE, "Invalid data type %#x for a LABEL register.", reg->data_type); @@ -3708,7 +4807,7 @@ static void vsir_validate_dst_param(struct validation_context *ctx, switch (dst->reg.type) { case VKD3DSPR_SSA: - if (dst->reg.idx[0].offset < ctx->parser->program.ssa_count) + if (dst->reg.idx[0].offset < ctx->program->ssa_count) { struct validation_context_ssa_data *data = &ctx->ssas[dst->reg.idx[0].offset]; @@ -3761,7 +4860,7 @@ static void vsir_validate_src_param(struct validation_context *ctx, switch (src->reg.type) { case VKD3DSPR_SSA: - if (src->reg.idx[0].offset < ctx->parser->program.ssa_count) + if (src->reg.idx[0].offset < ctx->program->ssa_count) { struct validation_context_ssa_data *data = &ctx->ssas[src->reg.idx[0].offset]; unsigned int i; @@ -3852,7 +4951,6 @@ static void vsir_validate_instruction(struct validation_context *ctx) size_t i; instruction = &ctx->program->instructions.elements[ctx->instruction_idx]; - ctx->parser->location = instruction->location; for (i = 0; i < instruction->dst_count; ++i) vsir_validate_dst_param(ctx, &instruction->dst[i]); @@ -4203,17 +5301,20 @@ static void vsir_validate_instruction(struct validation_context *ctx) } } -enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) +enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, + const char *source_name, struct vkd3d_shader_message_context *message_context) { struct validation_context ctx = { - .parser = parser, - .program = &parser->program, + .message_context = message_context, + .program = program, + .null_location = {.source_name = source_name}, + .status = VKD3D_OK, .phase = VKD3DSIH_INVALID, }; unsigned int i; - if (!(parser->config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) + if (!(config_flags & VKD3D_SHADER_CONFIG_FLAG_FORCE_VALIDATION)) return VKD3D_OK; if (!(ctx.temps = vkd3d_calloc(ctx.program->temp_count, sizeof(*ctx.temps)))) @@ -4222,7 +5323,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) if (!(ctx.ssas = vkd3d_calloc(ctx.program->ssa_count, sizeof(*ctx.ssas)))) goto fail; - for (ctx.instruction_idx = 0; ctx.instruction_idx < parser->program.instructions.count; ++ctx.instruction_idx) + for (ctx.instruction_idx = 0; ctx.instruction_idx < program->instructions.count; ++ctx.instruction_idx) vsir_validate_instruction(&ctx); ctx.invalid_instruction_idx = true; @@ -4247,7 +5348,7 @@ enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser) vkd3d_free(ctx.temps); vkd3d_free(ctx.ssas); - return VKD3D_OK; + return ctx.status; fail: vkd3d_free(ctx.blocks); diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c index 5c87ff15503..673400efd69 100644 --- a/libs/vkd3d/libs/vkd3d-shader/spirv.c +++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c @@ -223,6 +223,11 @@ enum vkd3d_shader_input_sysval_semantic vkd3d_siv_from_sysval_indexed(enum vkd3d } } +static bool data_type_is_floating_point(enum vkd3d_data_type data_type) +{ + return data_type == VKD3D_DATA_HALF || data_type == VKD3D_DATA_FLOAT || data_type == VKD3D_DATA_DOUBLE; +} + #define VKD3D_SPIRV_VERSION 0x00010000 #define VKD3D_SPIRV_GENERATOR_ID 18 #define VKD3D_SPIRV_GENERATOR_VERSION 11 @@ -1524,6 +1529,19 @@ static uint32_t vkd3d_spirv_build_op_logical_equal(struct vkd3d_spirv_builder *b SpvOpLogicalEqual, result_type, operand0, operand1); } +static uint32_t vkd3d_spirv_build_op_logical_or(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand0, uint32_t operand1) +{ + return vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, + SpvOpLogicalOr, result_type, operand0, operand1); +} + +static uint32_t vkd3d_spirv_build_op_logical_not(struct vkd3d_spirv_builder *builder, + uint32_t result_type, uint32_t operand) +{ + return vkd3d_spirv_build_op_tr1(builder, &builder->function_stream, SpvOpLogicalNot, result_type, operand); +} + static uint32_t vkd3d_spirv_build_op_convert_utof(struct vkd3d_spirv_builder *builder, uint32_t result_type, uint32_t unsigned_value) { @@ -1825,6 +1843,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder { switch (data_type) { + case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_FLOAT: case VKD3D_DATA_SNORM: case VKD3D_DATA_UNORM: @@ -1832,6 +1851,7 @@ static uint32_t vkd3d_spirv_get_type_id_for_data_type(struct vkd3d_spirv_builder break; case VKD3D_DATA_INT: case VKD3D_DATA_UINT: + case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ return vkd3d_spirv_get_op_type_int(builder, 32, data_type == VKD3D_DATA_INT); break; case VKD3D_DATA_DOUBLE: @@ -1940,6 +1960,9 @@ static bool vkd3d_spirv_compile_module(struct vkd3d_spirv_builder *builder, || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStorageImageArrayDynamicIndexing) || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderNonUniformEXT)) vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_descriptor_indexing"); + if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityFragmentShaderPixelInterlockEXT) + || vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityFragmentShaderSampleInterlockEXT)) + vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_fragment_shader_interlock"); if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityStencilExportEXT)) vkd3d_spirv_build_op_extension(&stream, "SPV_EXT_shader_stencil_export"); if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilityShaderViewportIndexLayerEXT)) @@ -2346,6 +2369,7 @@ struct spirv_compiler unsigned int output_control_point_count; bool use_vocp; + bool use_invocation_interlock; bool emit_point_size; enum vkd3d_shader_opcode phase; @@ -2427,14 +2451,14 @@ static void spirv_compiler_destroy(struct spirv_compiler *compiler) vkd3d_free(compiler); } -static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_version *shader_version, - struct vkd3d_shader_desc *shader_desc, const struct vkd3d_shader_compile_info *compile_info, +static struct spirv_compiler *spirv_compiler_create(const struct vsir_program *program, + const struct vkd3d_shader_compile_info *compile_info, const struct vkd3d_shader_scan_descriptor_info1 *scan_descriptor_info, struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location, uint64_t config_flags) { - const struct shader_signature *patch_constant_signature = &shader_desc->patch_constant_signature; - const struct shader_signature *output_signature = &shader_desc->output_signature; + const struct shader_signature *patch_constant_signature = &program->patch_constant_signature; + const struct shader_signature *output_signature = &program->output_signature; const struct vkd3d_shader_interface_info *shader_interface; const struct vkd3d_shader_descriptor_offset_info *offset_info; const struct vkd3d_shader_spirv_target_info *target_info; @@ -2545,7 +2569,7 @@ static struct spirv_compiler *spirv_compiler_create(const struct vkd3d_shader_ve rb_init(&compiler->symbol_table, vkd3d_symbol_compare); - compiler->shader_type = shader_version->type; + compiler->shader_type = program->shader_version.type; if ((shader_interface = vkd3d_find_struct(compile_info->next, INTERFACE_INFO))) { @@ -3736,6 +3760,70 @@ static uint32_t spirv_compiler_emit_bool_to_double(struct spirv_compiler *compil return vkd3d_spirv_build_op_select(builder, type_id, val_id, true_id, false_id); } +/* Based on the implementation in the OpenGL Mathematics library. */ +static uint32_t half_to_float(uint16_t value) +{ + uint32_t s = (value & 0x8000u) << 16; + uint32_t e = (value >> 10) & 0x1fu; + uint32_t m = value & 0x3ffu; + + if (!e) + { + if (!m) + { + /* Plus or minus zero */ + return s; + } + else + { + /* Denormalized number -- renormalize it */ + + while (!(m & 0x400u)) + { + m <<= 1; + --e; + } + + ++e; + m &= ~0x400u; + } + } + else if (e == 31u) + { + /* Positive or negative infinity for zero 'm'. + * Nan for non-zero 'm' -- preserve sign and significand bits */ + return s | 0x7f800000u | (m << 13); + } + + /* Normalized number */ + e += 127u - 15u; + m <<= 13; + + /* Assemble s, e and m. */ + return s | (e << 23) | m; +} + +static uint32_t convert_raw_constant32(enum vkd3d_data_type data_type, unsigned int uint_value) +{ + int16_t i; + + /* TODO: native 16-bit support. */ + if (data_type != VKD3D_DATA_UINT16 && data_type != VKD3D_DATA_HALF) + return uint_value; + + if (data_type == VKD3D_DATA_HALF) + return half_to_float(uint_value); + + /* Values in DXIL have no signedness, so it is ambiguous whether 16-bit constants should or + * should not be sign-extended when 16-bit execution is not supported. The AMD RX 580 Windows + * driver has no 16-bit support, and sign-extends all 16-bit constant ints to 32 bits. These + * results differ from SM 5. The RX 6750 XT supports 16-bit execution, so constants are not + * extended, and results match SM 5. It seems best to replicate the sign-extension, and if + * execution is 16-bit, the values will be truncated. */ + i = uint_value; + return (int32_t)i; +} + static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compiler, const struct vkd3d_shader_register *reg, uint32_t swizzle, uint32_t write_mask) { @@ -3748,14 +3836,15 @@ static uint32_t spirv_compiler_emit_load_constant(struct spirv_compiler *compile if (reg->dimension == VSIR_DIMENSION_SCALAR) { for (i = 0; i < component_count; ++i) - values[i] = *reg->u.immconst_u32; + values[i] = convert_raw_constant32(reg->data_type, reg->u.immconst_u32[0]); } else { for (i = 0, j = 0; i < VKD3D_VEC4_SIZE; ++i) { if (write_mask & (VKD3DSP_WRITEMASK_0 << i)) - values[j++] = reg->u.immconst_u32[vsir_swizzle_get_component(swizzle, i)]; + values[j++] = convert_raw_constant32(reg->data_type, + reg->u.immconst_u32[vsir_swizzle_get_component(swizzle, i)]); } } @@ -3899,6 +3988,13 @@ static uint32_t spirv_compiler_emit_constant_array(struct spirv_compiler *compil switch (icb->data_type) { + case VKD3D_DATA_HALF: + case VKD3D_DATA_UINT16: + /* Scalar only. */ + for (i = 0; i < element_count; ++i) + elements[i] = vkd3d_spirv_get_op_constant(builder, elem_type_id, + convert_raw_constant32(icb->data_type, icb->data[i])); + break; case VKD3D_DATA_FLOAT: case VKD3D_DATA_INT: case VKD3D_DATA_UINT: @@ -4087,7 +4183,7 @@ static uint32_t spirv_compiler_emit_abs(struct spirv_compiler *compiler, uint32_t type_id; type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_glsl_std450_fabs(builder, type_id, val_id); FIXME("Unhandled data type %#x.\n", reg->data_type); @@ -4101,7 +4197,7 @@ static uint32_t spirv_compiler_emit_neg(struct spirv_compiler *compiler, uint32_t type_id; type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_fnegate(builder, type_id, val_id); else if (data_type_is_integer(reg->data_type)) return vkd3d_spirv_build_op_snegate(builder, type_id, val_id); @@ -4285,7 +4381,7 @@ static uint32_t spirv_compiler_emit_sat(struct spirv_compiler *compiler, } type_id = spirv_compiler_get_type_id_for_reg(compiler, reg, write_mask); - if (reg->data_type == VKD3D_DATA_FLOAT || reg->data_type == VKD3D_DATA_DOUBLE) + if (data_type_is_floating_point(reg->data_type)) return vkd3d_spirv_build_op_glsl_std450_nclamp(builder, type_id, val_id, zero_id, one_id); FIXME("Unhandled data type %#x.\n", reg->data_type); @@ -6272,9 +6368,24 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp if (!(d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_READ)) vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationNonReadable, NULL, 0); - if (d->uav_flags & VKD3DSUF_GLOBALLY_COHERENT) + /* ROVs are implicitly globally coherent. */ + if (d->uav_flags & (VKD3DSUF_GLOBALLY_COHERENT | VKD3DSUF_RASTERISER_ORDERED_VIEW)) vkd3d_spirv_build_op_decorate(builder, var_id, SpvDecorationCoherent, NULL, 0); + if (d->uav_flags & VKD3DSUF_RASTERISER_ORDERED_VIEW) + { + if (compiler->shader_type != VKD3D_SHADER_TYPE_PIXEL) + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, + "Rasteriser-ordered views are only supported in fragment shaders."); + else if (!spirv_compiler_is_target_extension_supported(compiler, + VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK)) + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_UNSUPPORTED_FEATURE, + "Cannot enable fragment shader interlock. " + "The target environment does not support fragment shader interlock."); + else + compiler->use_invocation_interlock = true; + } + if (d->flags & VKD3D_SHADER_DESCRIPTOR_INFO_FLAG_UAV_COUNTER) { assert(structure_stride); /* counters are valid only for structured buffers */ @@ -6324,20 +6435,26 @@ static void spirv_compiler_emit_resource_declaration(struct spirv_compiler *comp } static void spirv_compiler_emit_workgroup_memory(struct spirv_compiler *compiler, - const struct vkd3d_shader_register *reg, unsigned int size, unsigned int structure_stride) + const struct vkd3d_shader_register *reg, unsigned int alignment, unsigned int size, + unsigned int structure_stride, bool zero_init) { - uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id; + uint32_t type_id, array_type_id, length_id, pointer_type_id, var_id, init_id; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; const SpvStorageClass storage_class = SpvStorageClassWorkgroup; struct vkd3d_symbol reg_symbol; + /* Alignment is supported only in the Kernel execution model. */ + if (alignment) + TRACE("Ignoring alignment %u.\n", alignment); + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_UINT, 1); length_id = spirv_compiler_get_constant_uint(compiler, size); array_type_id = vkd3d_spirv_get_op_type_array(builder, type_id, length_id); pointer_type_id = vkd3d_spirv_get_op_type_pointer(builder, storage_class, array_type_id); + init_id = zero_init ? vkd3d_spirv_get_op_constant_null(builder, array_type_id) : 0; var_id = vkd3d_spirv_build_op_variable(builder, &builder->global_stream, - pointer_type_id, storage_class, 0); + pointer_type_id, storage_class, init_id); spirv_compiler_emit_register_debug_name(builder, var_id, reg); @@ -6352,8 +6469,8 @@ static void spirv_compiler_emit_dcl_tgsm_raw(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { const struct vkd3d_shader_tgsm_raw *tgsm_raw = &instruction->declaration.tgsm_raw; - spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, - tgsm_raw->byte_count / 4, 0); + spirv_compiler_emit_workgroup_memory(compiler, &tgsm_raw->reg.reg, tgsm_raw->alignment, + tgsm_raw->byte_count / 4, 0, tgsm_raw->zero_init); } static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compiler, @@ -6361,8 +6478,8 @@ static void spirv_compiler_emit_dcl_tgsm_structured(struct spirv_compiler *compi { const struct vkd3d_shader_tgsm_structured *tgsm_structured = &instruction->declaration.tgsm_structured; unsigned int stride = tgsm_structured->byte_stride / 4; - spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, - tgsm_structured->structure_count * stride, stride); + spirv_compiler_emit_workgroup_memory(compiler, &tgsm_structured->reg.reg, tgsm_structured->alignment, + tgsm_structured->structure_count * stride, stride, tgsm_structured->zero_init); } static void spirv_compiler_emit_dcl_input(struct spirv_compiler *compiler, @@ -6871,7 +6988,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, assert(src->reg.data_type == VKD3D_DATA_BOOL && dst->reg.data_type != VKD3D_DATA_BOOL); val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); - if (dst->reg.data_type == VKD3D_DATA_FLOAT) + if (dst->reg.data_type == VKD3D_DATA_HALF || dst->reg.data_type == VKD3D_DATA_FLOAT) { val_id = spirv_compiler_emit_bool_to_float(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); } @@ -6880,7 +6997,7 @@ static void spirv_compiler_emit_bool_cast(struct spirv_compiler *compiler, /* ITOD is not supported. Frontends which emit bool casts must use ITOF for double. */ val_id = spirv_compiler_emit_bool_to_double(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOF); } - else if (dst->reg.data_type == VKD3D_DATA_UINT) + else if (dst->reg.data_type == VKD3D_DATA_UINT16 || dst->reg.data_type == VKD3D_DATA_UINT) { val_id = spirv_compiler_emit_bool_to_int(compiler, 1, val_id, instruction->handler_idx == VKD3DSIH_ITOI); } @@ -6909,6 +7026,15 @@ static enum vkd3d_result spirv_compiler_emit_alu_instruction(struct spirv_compil SpvOp op = SpvOpMax; unsigned int i; + if (src->reg.data_type == VKD3D_DATA_UINT64 && instruction->handler_idx == VKD3DSIH_COUNTBITS) + { + /* At least some drivers support this anyway, but if validation is enabled it will fail. */ + FIXME("Unsupported 64-bit source for bit count.\n"); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "64-bit source for bit count is not supported."); + return VKD3D_ERROR_INVALID_SHADER; + } + if (src->reg.data_type == VKD3D_DATA_BOOL) { if (dst->reg.data_type == VKD3D_DATA_BOOL) @@ -7049,6 +7175,16 @@ static void spirv_compiler_emit_ext_glsl_instruction(struct spirv_compiler *comp unsigned int i, component_count; enum GLSLstd450 glsl_inst; + if (src[0].reg.data_type == VKD3D_DATA_UINT64 && (instruction->handler_idx == VKD3DSIH_FIRSTBIT_HI + || instruction->handler_idx == VKD3DSIH_FIRSTBIT_LO || instruction->handler_idx == VKD3DSIH_FIRSTBIT_SHI)) + { + /* At least some drivers support this anyway, but if validation is enabled it will fail. */ + FIXME("Unsupported 64-bit source for handler %#x.\n", instruction->handler_idx); + spirv_compiler_error(compiler, VKD3D_SHADER_ERROR_SPV_NOT_IMPLEMENTED, + "64-bit source for handler %#x is not supported.", instruction->handler_idx); + return; + } + glsl_inst = spirv_compiler_map_ext_glsl_instruction(instruction); if (glsl_inst == GLSLstd450Bad) { @@ -7093,8 +7229,8 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, struct vkd3d_shader_register_info dst_reg_info, src_reg_info; const struct vkd3d_shader_dst_param *dst = instruction->dst; const struct vkd3d_shader_src_param *src = instruction->src; + unsigned int i, component_count, write_mask; uint32_t components[VKD3D_VEC4_SIZE]; - unsigned int i, component_count; if (register_is_constant_or_undef(&src->reg) || src->reg.type == VKD3DSPR_SSA || dst->reg.type == VKD3DSPR_SSA || dst->modifiers || src->modifiers) @@ -7145,7 +7281,9 @@ static void spirv_compiler_emit_mov(struct spirv_compiler *compiler, } general_implementation: - val_id = spirv_compiler_emit_load_src(compiler, src, dst->write_mask); + write_mask = (src->reg.type == VKD3DSPR_IMMCONST64 && !data_type_is_64_bit(dst->reg.data_type)) + ? vsir_write_mask_64_from_32(dst->write_mask) : dst->write_mask; + val_id = spirv_compiler_emit_load_src(compiler, src, write_mask); if (dst->reg.data_type != src->reg.data_type) { val_id = vkd3d_spirv_build_op_bitcast(builder, vkd3d_spirv_get_type_id_for_data_type(builder, @@ -7171,8 +7309,15 @@ static void spirv_compiler_emit_movc(struct spirv_compiler *compiler, type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); if (src[0].reg.data_type != VKD3D_DATA_BOOL) - condition_id = spirv_compiler_emit_int_to_bool(compiler, - VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); + { + if (instruction->handler_idx == VKD3DSIH_CMP) + condition_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, SpvOpFOrdGreaterThanEqual, + vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count), condition_id, + spirv_compiler_get_constant_float_vector(compiler, 0.0f, component_count)); + else + condition_id = spirv_compiler_emit_int_to_bool(compiler, + VKD3D_SHADER_CONDITIONAL_OP_NZ, src[0].reg.data_type, component_count, condition_id); + } val_id = vkd3d_spirv_build_op_select(builder, type_id, condition_id, src1_id, src2_id); spirv_compiler_emit_store_dst(compiler, dst, val_id); @@ -7335,7 +7480,7 @@ static void spirv_compiler_emit_imad(struct spirv_compiler *compiler, unsigned int i, component_count; component_count = vsir_write_mask_component_count(dst->write_mask); - type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_INT, component_count); + type_id = vkd3d_spirv_get_type_id_for_data_type(builder, dst->reg.data_type, component_count); for (i = 0; i < ARRAY_SIZE(src_ids); ++i) src_ids[i] = spirv_compiler_emit_load_src(compiler, &src[i], dst->write_mask); @@ -7684,6 +7829,56 @@ static void spirv_compiler_emit_comparison_instruction(struct spirv_compiler *co spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); } +static void spirv_compiler_emit_orderedness_instruction(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t type_id, src0_id, src1_id, val_id; + + type_id = spirv_compiler_get_type_id_for_dst(compiler, dst); + src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); + src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); + /* OpOrdered and OpUnordered are only available in Kernel mode. */ + src0_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src0_id); + src1_id = vkd3d_spirv_build_op_is_nan(builder, type_id, src1_id); + val_id = vkd3d_spirv_build_op_logical_or(builder, type_id, src0_id, src1_id); + if (instruction->handler_idx == VKD3DSIH_ORD) + val_id = vkd3d_spirv_build_op_logical_not(builder, type_id, val_id); + spirv_compiler_emit_store_dst(compiler, dst, val_id); +} + +static void spirv_compiler_emit_float_comparison_instruction(struct spirv_compiler *compiler, + const struct vkd3d_shader_instruction *instruction) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + const struct vkd3d_shader_dst_param *dst = instruction->dst; + const struct vkd3d_shader_src_param *src = instruction->src; + uint32_t src0_id, src1_id, type_id, result_id; + unsigned int component_count; + SpvOp op; + + switch (instruction->handler_idx) + { + case VKD3DSIH_SLT: op = SpvOpFOrdLessThan; break; + case VKD3DSIH_SGE: op = SpvOpFOrdGreaterThanEqual; break; + default: + vkd3d_unreachable(); + } + + component_count = vsir_write_mask_component_count(dst->write_mask); + + src0_id = spirv_compiler_emit_load_src(compiler, &src[0], dst->write_mask); + src1_id = spirv_compiler_emit_load_src(compiler, &src[1], dst->write_mask); + + type_id = vkd3d_spirv_get_type_id(builder, VKD3D_SHADER_COMPONENT_BOOL, component_count); + result_id = vkd3d_spirv_build_op_tr2(builder, &builder->function_stream, op, type_id, src0_id, src1_id); + + result_id = spirv_compiler_emit_bool_to_float(compiler, component_count, result_id, false); + spirv_compiler_emit_store_reg(compiler, &dst->reg, dst->write_mask, result_id); +} + static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction, uint32_t target_block_id) { @@ -7702,11 +7897,31 @@ static uint32_t spirv_compiler_emit_conditional_branch(struct spirv_compiler *co return merge_block_id; } +static void spirv_compiler_end_invocation_interlock(struct spirv_compiler *compiler) +{ + struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + + if (vkd3d_spirv_capability_is_enabled(builder, SpvCapabilitySampleRateShading)) + { + spirv_compiler_emit_execution_mode(compiler, SpvExecutionModeSampleInterlockOrderedEXT, NULL, 0); + vkd3d_spirv_enable_capability(builder, SpvCapabilityFragmentShaderSampleInterlockEXT); + } + else + { + spirv_compiler_emit_execution_mode(compiler, SpvExecutionModePixelInterlockOrderedEXT, NULL, 0); + vkd3d_spirv_enable_capability(builder, SpvCapabilityFragmentShaderPixelInterlockEXT); + } + vkd3d_spirv_build_op(&builder->function_stream, SpvOpEndInvocationInterlockEXT); +} + static void spirv_compiler_emit_return(struct spirv_compiler *compiler, const struct vkd3d_shader_instruction *instruction) { struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; + if (compiler->use_invocation_interlock) + spirv_compiler_end_invocation_interlock(compiler); + if (compiler->shader_type != VKD3D_SHADER_TYPE_GEOMETRY && (is_in_default_phase(compiler) || is_in_control_point_phase(compiler))) spirv_compiler_emit_shader_epilogue_invocation(compiler); @@ -9475,6 +9690,11 @@ static void spirv_compiler_emit_main_prolog(struct spirv_compiler *compiler) if (compiler->emit_point_size) spirv_compiler_emit_point_size(compiler); + + /* Maybe in the future we can try to shrink the size of the interlocked + * section. */ + if (compiler->use_invocation_interlock) + vkd3d_spirv_build_op(&compiler->spirv_builder.function_stream, SpvOpBeginInvocationInterlockEXT); } static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, @@ -9549,6 +9769,7 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, break; case VKD3DSIH_DMOVC: case VKD3DSIH_MOVC: + case VKD3DSIH_CMP: spirv_compiler_emit_movc(compiler, instruction); break; case VKD3DSIH_SWAPC: @@ -9669,6 +9890,14 @@ static int spirv_compiler_handle_instruction(struct spirv_compiler *compiler, case VKD3DSIH_ULT: spirv_compiler_emit_comparison_instruction(compiler, instruction); break; + case VKD3DSIH_ORD: + case VKD3DSIH_UNO: + spirv_compiler_emit_orderedness_instruction(compiler, instruction); + break; + case VKD3DSIH_SLT: + case VKD3DSIH_SGE: + spirv_compiler_emit_float_comparison_instruction(compiler, instruction); + break; case VKD3DSIH_BFI: case VKD3DSIH_IBFE: case VKD3DSIH_UBFE: @@ -9899,13 +10128,13 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, const struct vkd3d_shader_spirv_target_info *info = compiler->spirv_target_info; const struct vkd3d_shader_spirv_domain_shader_target_info *ds_info; struct vkd3d_spirv_builder *builder = &compiler->spirv_builder; - struct vkd3d_shader_desc *shader_desc = &parser->shader_desc; struct vkd3d_shader_instruction_array instructions; struct vsir_program *program = &parser->program; enum vkd3d_result result = VKD3D_OK; unsigned int i; - if ((result = vkd3d_shader_normalise(parser, compile_info)) < 0) + if ((result = vsir_program_normalise(program, compiler->config_flags, + compile_info, compiler->message_context)) < 0) return result; if (program->temp_count) @@ -9924,12 +10153,12 @@ static int spirv_compiler_generate_spirv(struct spirv_compiler *compiler, instructions = program->instructions; memset(&program->instructions, 0, sizeof(program->instructions)); - compiler->input_signature = shader_desc->input_signature; - compiler->output_signature = shader_desc->output_signature; - compiler->patch_constant_signature = shader_desc->patch_constant_signature; - memset(&shader_desc->input_signature, 0, sizeof(shader_desc->input_signature)); - memset(&shader_desc->output_signature, 0, sizeof(shader_desc->output_signature)); - memset(&shader_desc->patch_constant_signature, 0, sizeof(shader_desc->patch_constant_signature)); + compiler->input_signature = program->input_signature; + compiler->output_signature = program->output_signature; + compiler->patch_constant_signature = program->patch_constant_signature; + memset(&program->input_signature, 0, sizeof(program->input_signature)); + memset(&program->output_signature, 0, sizeof(program->output_signature)); + memset(&program->patch_constant_signature, 0, sizeof(program->patch_constant_signature)); compiler->use_vocp = program->use_vocp; compiler->block_names = program->block_names; compiler->block_name_count = program->block_name_count; @@ -10036,8 +10265,8 @@ int spirv_compile(struct vkd3d_shader_parser *parser, struct spirv_compiler *spirv_compiler; int ret; - if (!(spirv_compiler = spirv_compiler_create(&parser->program.shader_version, &parser->shader_desc, - compile_info, scan_descriptor_info, message_context, &parser->location, parser->config_flags))) + if (!(spirv_compiler = spirv_compiler_create(&parser->program, compile_info, + scan_descriptor_info, message_context, &parser->location, parser->config_flags))) { ERR("Failed to create SPIR-V compiler.\n"); return VKD3D_ERROR; diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c index 3be4e40ab0c..cb4f6d4ddbf 100644 --- a/libs/vkd3d/libs/vkd3d-shader/tpf.c +++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c @@ -954,32 +954,32 @@ static void shader_sm4_read_dcl_index_range(struct vkd3d_shader_instruction *ins case VKD3DSPR_INCONTROLPOINT: io_masks = priv->input_register_masks; ranges = &priv->input_index_ranges; - signature = &priv->p.shader_desc.input_signature; + signature = &priv->p.program.input_signature; break; case VKD3DSPR_OUTPUT: if (sm4_parser_is_in_fork_or_join_phase(priv)) { io_masks = priv->patch_constant_register_masks; ranges = &priv->patch_constant_index_ranges; - signature = &priv->p.shader_desc.patch_constant_signature; + signature = &priv->p.program.patch_constant_signature; } else { io_masks = priv->output_register_masks; ranges = &priv->output_index_ranges; - signature = &priv->p.shader_desc.output_signature; + signature = &priv->p.program.output_signature; } break; case VKD3DSPR_COLOROUT: case VKD3DSPR_OUTCONTROLPOINT: io_masks = priv->output_register_masks; ranges = &priv->output_index_ranges; - signature = &priv->p.shader_desc.output_signature; + signature = &priv->p.program.output_signature; break; case VKD3DSPR_PATCHCONST: io_masks = priv->patch_constant_register_masks; ranges = &priv->patch_constant_index_ranges; - signature = &priv->p.shader_desc.patch_constant_signature; + signature = &priv->p.program.patch_constant_signature; break; default: @@ -1113,7 +1113,7 @@ static void shader_sm4_read_dcl_input_ps(struct vkd3d_shader_instruction *ins, u if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) { struct signature_element *e = vsir_signature_find_element_for_reg( - &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + &priv->p.program.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); e->interpolation_mode = ins->flags; } @@ -1128,7 +1128,7 @@ static void shader_sm4_read_dcl_input_ps_siv(struct vkd3d_shader_instruction *in if (shader_sm4_read_dst_param(priv, &tokens, &tokens[token_count], VKD3D_DATA_FLOAT, dst)) { struct signature_element *e = vsir_signature_find_element_for_reg( - &priv->p.shader_desc.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); + &priv->p.program.input_signature, dst->reg.idx[dst->reg.idx_count - 1].offset, dst->write_mask); e->interpolation_mode = ins->flags; } @@ -1263,6 +1263,7 @@ static void shader_sm5_read_dcl_tgsm_raw(struct vkd3d_shader_instruction *ins, u ins->declaration.tgsm_raw.byte_count = *tokens; if (ins->declaration.tgsm_raw.byte_count % 4) FIXME("Byte count %u is not multiple of 4.\n", ins->declaration.tgsm_raw.byte_count); + ins->declaration.tgsm_raw.zero_init = false; } static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1274,6 +1275,7 @@ static void shader_sm5_read_dcl_tgsm_structured(struct vkd3d_shader_instruction ins->declaration.tgsm_structured.structure_count = *tokens; if (ins->declaration.tgsm_structured.byte_stride % 4) FIXME("Byte stride %u is not multiple of 4.\n", ins->declaration.tgsm_structured.byte_stride); + ins->declaration.tgsm_structured.zero_init = false; } static void shader_sm5_read_dcl_resource_structured(struct vkd3d_shader_instruction *ins, uint32_t opcode, @@ -1748,7 +1750,6 @@ static void shader_sm4_destroy(struct vkd3d_shader_parser *parser) struct vkd3d_shader_sm4_parser *sm4 = vkd3d_shader_sm4_parser(parser); vsir_program_cleanup(&parser->program); - free_shader_desc(&parser->shader_desc); vkd3d_free(sm4); } @@ -2504,7 +2505,7 @@ static const struct vkd3d_shader_parser_ops shader_sm4_parser_ops = }; static bool shader_sm4_init(struct vkd3d_shader_sm4_parser *sm4, const uint32_t *byte_code, - size_t byte_code_size, const char *source_name, const struct shader_signature *output_signature, + size_t byte_code_size, const char *source_name, struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_version version; @@ -2648,9 +2649,9 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser) { struct vkd3d_shader_instruction_array *instructions; - struct vkd3d_shader_desc *shader_desc; struct vkd3d_shader_instruction *ins; struct vkd3d_shader_sm4_parser *sm4; + struct dxbc_shader_desc dxbc_desc = {0}; int ret; if (!(sm4 = vkd3d_calloc(1, sizeof(*sm4)))) @@ -2659,36 +2660,40 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi return VKD3D_ERROR_OUT_OF_MEMORY; } - shader_desc = &sm4->p.shader_desc; - shader_desc->is_dxil = false; + dxbc_desc.is_dxil = false; if ((ret = shader_extract_from_dxbc(&compile_info->source, - message_context, compile_info->source_name, shader_desc)) < 0) + message_context, compile_info->source_name, &dxbc_desc)) < 0) { WARN("Failed to extract shader, vkd3d result %d.\n", ret); vkd3d_free(sm4); return ret; } - if (!shader_sm4_init(sm4, shader_desc->byte_code, shader_desc->byte_code_size, - compile_info->source_name, &shader_desc->output_signature, message_context)) + if (!shader_sm4_init(sm4, dxbc_desc.byte_code, dxbc_desc.byte_code_size, + compile_info->source_name, message_context)) { WARN("Failed to initialise shader parser.\n"); - free_shader_desc(shader_desc); + free_dxbc_shader_desc(&dxbc_desc); vkd3d_free(sm4); return VKD3D_ERROR_INVALID_ARGUMENT; } + sm4->p.program.input_signature = dxbc_desc.input_signature; + sm4->p.program.output_signature = dxbc_desc.output_signature; + sm4->p.program.patch_constant_signature = dxbc_desc.patch_constant_signature; + memset(&dxbc_desc, 0, sizeof(dxbc_desc)); + /* DXBC stores used masks inverted for output signatures, for some reason. * We return them un-inverted. */ - uninvert_used_masks(&shader_desc->output_signature); + uninvert_used_masks(&sm4->p.program.output_signature); if (sm4->p.program.shader_version.type == VKD3D_SHADER_TYPE_HULL) - uninvert_used_masks(&shader_desc->patch_constant_signature); + uninvert_used_masks(&sm4->p.program.patch_constant_signature); - if (!shader_sm4_parser_validate_signature(sm4, &shader_desc->input_signature, + if (!shader_sm4_parser_validate_signature(sm4, &sm4->p.program.input_signature, sm4->input_register_masks, "Input") - || !shader_sm4_parser_validate_signature(sm4, &shader_desc->output_signature, + || !shader_sm4_parser_validate_signature(sm4, &sm4->p.program.output_signature, sm4->output_register_masks, "Output") - || !shader_sm4_parser_validate_signature(sm4, &shader_desc->patch_constant_signature, + || !shader_sm4_parser_validate_signature(sm4, &sm4->p.program.patch_constant_signature, sm4->patch_constant_register_masks, "Patch constant")) { shader_sm4_destroy(&sm4->p); @@ -2721,7 +2726,7 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi shader_sm4_validate_default_phase_index_ranges(sm4); if (!sm4->p.failed) - vsir_validate(&sm4->p); + vkd3d_shader_parser_validate(&sm4->p); if (sm4->p.failed) { diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c index 4f400d19f6f..81ac84896d4 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_main.c @@ -23,6 +23,8 @@ #include #include +/* VKD3D_DEBUG_ENV_NAME("VKD3D_SHADER_DEBUG"); */ + static inline int char_to_int(char c) { if ('0' <= c && c <= '9') @@ -71,8 +73,16 @@ void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer) void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer) { - buffer->buffer[0] = '\0'; - buffer->content_size = 0; + vkd3d_string_buffer_truncate(buffer, 0); +} + +void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size) +{ + if (size < buffer->content_size) + { + buffer->buffer[size] = '\0'; + buffer->content_size = size; + } } static bool vkd3d_string_buffer_resize(struct vkd3d_string_buffer *buffer, int rc) @@ -224,6 +234,16 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *cache, struct cache->buffers[cache->count++] = buffer; } +void vkd3d_shader_code_from_string_buffer(struct vkd3d_shader_code *code, struct vkd3d_string_buffer *buffer) +{ + code->code = buffer->buffer; + code->size = buffer->content_size; + + buffer->buffer = NULL; + buffer->buffer_size = 0; + buffer->content_size = 0; +} + void vkd3d_shader_message_context_init(struct vkd3d_shader_message_context *context, enum vkd3d_shader_log_level log_level) { @@ -1438,11 +1458,11 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info if (!ret && signature_info) { - if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->shader_desc.input_signature) + if (!vkd3d_shader_signature_from_shader_signature(&signature_info->input, &parser->program.input_signature) || !vkd3d_shader_signature_from_shader_signature(&signature_info->output, - &parser->shader_desc.output_signature) + &parser->program.output_signature) || !vkd3d_shader_signature_from_shader_signature(&signature_info->patch_constant, - &parser->shader_desc.patch_constant_signature)) + &parser->program.patch_constant_signature)) { ret = VKD3D_ERROR_OUT_OF_MEMORY; } @@ -1470,60 +1490,6 @@ static int scan_with_parser(const struct vkd3d_shader_compile_info *compile_info return ret; } -static int scan_dxbc(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context) -{ - struct vkd3d_shader_parser *parser; - int ret; - - if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) - { - WARN("Failed to initialise shader parser.\n"); - return ret; - } - - ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -} - -static int scan_d3dbc(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context) -{ - struct vkd3d_shader_parser *parser; - int ret; - - if ((ret = vkd3d_shader_sm1_parser_create(compile_info, message_context, &parser)) < 0) - { - WARN("Failed to initialise shader parser.\n"); - return ret; - } - - ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -} - -static int scan_dxil(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_message_context *message_context) -{ - struct vkd3d_shader_parser *parser; - int ret; - - if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) - { - WARN("Failed to initialise shader parser.\n"); - return ret; - } - - ret = scan_with_parser(compile_info, message_context, NULL, parser); - vkd3d_shader_parser_destroy(parser); - - return ret; -} - int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char **messages) { struct vkd3d_shader_message_context message_context; @@ -1543,29 +1509,44 @@ int vkd3d_shader_scan(const struct vkd3d_shader_compile_info *compile_info, char vkd3d_shader_dump_shader(compile_info); - switch (compile_info->source_type) + if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { - case VKD3D_SHADER_SOURCE_DXBC_TPF: - ret = scan_dxbc(compile_info, &message_context); - break; + FIXME("HLSL support not implemented.\n"); + ret = VKD3D_ERROR_NOT_IMPLEMENTED; + } + else + { + struct vkd3d_shader_parser *parser; - case VKD3D_SHADER_SOURCE_HLSL: - FIXME("HLSL support not implemented.\n"); - ret = VKD3D_ERROR_NOT_IMPLEMENTED; - break; + switch (compile_info->source_type) + { + case VKD3D_SHADER_SOURCE_D3D_BYTECODE: + ret = vkd3d_shader_sm1_parser_create(compile_info, &message_context, &parser); + break; - case VKD3D_SHADER_SOURCE_D3D_BYTECODE: - ret = scan_d3dbc(compile_info, &message_context); - break; + case VKD3D_SHADER_SOURCE_DXBC_TPF: + ret = vkd3d_shader_sm4_parser_create(compile_info, &message_context, &parser); + break; - case VKD3D_SHADER_SOURCE_DXBC_DXIL: - ret = scan_dxil(compile_info, &message_context); - break; + case VKD3D_SHADER_SOURCE_DXBC_DXIL: + ret = vkd3d_shader_sm6_parser_create(compile_info, &message_context, &parser); + break; - default: - ERR("Unsupported source type %#x.\n", compile_info->source_type); - ret = VKD3D_ERROR_INVALID_ARGUMENT; - break; + default: + ERR("Unsupported source type %#x.\n", compile_info->source_type); + ret = VKD3D_ERROR_INVALID_ARGUMENT; + break; + } + + if (ret < 0) + { + WARN("Failed to create shader parser.\n"); + } + else + { + ret = scan_with_parser(compile_info, &message_context, NULL, parser); + vkd3d_shader_parser_destroy(parser); + } } vkd3d_shader_message_context_trace_messages(&message_context); @@ -1580,7 +1561,7 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { struct vkd3d_shader_scan_descriptor_info1 scan_descriptor_info; - struct vkd3d_glsl_generator *glsl_generator; + struct vsir_program *program = &parser->program; struct vkd3d_shader_compile_info scan_info; int ret; @@ -1589,22 +1570,13 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, switch (compile_info->target_type) { case VKD3D_SHADER_TARGET_D3D_ASM: - ret = vkd3d_dxbc_binary_to_text(&parser->program, compile_info, out, VSIR_ASM_D3D); + ret = d3d_asm_compile(program, compile_info, out, VSIR_ASM_FLAG_NONE); break; case VKD3D_SHADER_TARGET_GLSL: if ((ret = scan_with_parser(&scan_info, message_context, &scan_descriptor_info, parser)) < 0) return ret; - if (!(glsl_generator = vkd3d_glsl_generator_create(&parser->program.shader_version, - message_context, &parser->location))) - { - ERR("Failed to create GLSL generator.\n"); - vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); - return VKD3D_ERROR; - } - - ret = vkd3d_glsl_generator_generate(glsl_generator, &parser->program, out); - vkd3d_glsl_generator_destroy(glsl_generator); + ret = glsl_compile(program, out, message_context); vkd3d_shader_free_scan_descriptor_info1(&scan_descriptor_info); break; @@ -1624,24 +1596,6 @@ static int vkd3d_shader_parser_compile(struct vkd3d_shader_parser *parser, return ret; } -static int compile_dxbc_tpf(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -{ - struct vkd3d_shader_parser *parser; - int ret; - - if ((ret = vkd3d_shader_sm4_parser_create(compile_info, message_context, &parser)) < 0) - { - WARN("Failed to initialise shader parser.\n"); - return ret; - } - - ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); - - vkd3d_shader_parser_destroy(parser); - return ret; -} - static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) { @@ -1657,42 +1611,6 @@ static int compile_hlsl(const struct vkd3d_shader_compile_info *compile_info, return ret; } -static int compile_d3d_bytecode(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -{ - struct vkd3d_shader_parser *parser; - int ret; - - if ((ret = vkd3d_shader_sm1_parser_create(compile_info, message_context, &parser)) < 0) - { - WARN("Failed to initialise shader parser.\n"); - return ret; - } - - ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); - - vkd3d_shader_parser_destroy(parser); - return ret; -} - -static int compile_dxbc_dxil(const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context) -{ - struct vkd3d_shader_parser *parser; - int ret; - - if ((ret = vkd3d_shader_sm6_parser_create(compile_info, message_context, &parser)) < 0) - { - WARN("Failed to initialise shader parser.\n"); - return ret; - } - - ret = vkd3d_shader_parser_compile(parser, compile_info, out, message_context); - - vkd3d_shader_parser_destroy(parser); - return ret; -} - int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, char **messages) { @@ -1713,26 +1631,43 @@ int vkd3d_shader_compile(const struct vkd3d_shader_compile_info *compile_info, vkd3d_shader_dump_shader(compile_info); - switch (compile_info->source_type) + if (compile_info->source_type == VKD3D_SHADER_SOURCE_HLSL) { - case VKD3D_SHADER_SOURCE_DXBC_TPF: - ret = compile_dxbc_tpf(compile_info, out, &message_context); - break; + ret = compile_hlsl(compile_info, out, &message_context); + } + else + { + struct vkd3d_shader_parser *parser; - case VKD3D_SHADER_SOURCE_HLSL: - ret = compile_hlsl(compile_info, out, &message_context); - break; + switch (compile_info->source_type) + { + case VKD3D_SHADER_SOURCE_D3D_BYTECODE: + ret = vkd3d_shader_sm1_parser_create(compile_info, &message_context, &parser); + break; - case VKD3D_SHADER_SOURCE_D3D_BYTECODE: - ret = compile_d3d_bytecode(compile_info, out, &message_context); - break; + case VKD3D_SHADER_SOURCE_DXBC_TPF: + ret = vkd3d_shader_sm4_parser_create(compile_info, &message_context, &parser); + break; - case VKD3D_SHADER_SOURCE_DXBC_DXIL: - ret = compile_dxbc_dxil(compile_info, out, &message_context); - break; + case VKD3D_SHADER_SOURCE_DXBC_DXIL: + ret = vkd3d_shader_sm6_parser_create(compile_info, &message_context, &parser); + break; - default: - vkd3d_unreachable(); + default: + ERR("Unsupported source type %#x.\n", compile_info->source_type); + ret = VKD3D_ERROR_INVALID_ARGUMENT; + break; + } + + if (ret < 0) + { + WARN("Failed to create shader parser.\n"); + } + else + { + ret = vkd3d_shader_parser_compile(parser, compile_info, out, &message_context); + vkd3d_shader_parser_destroy(parser); + } } vkd3d_shader_message_context_trace_messages(&message_context); @@ -1937,7 +1872,7 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_SPIRV_TEXT, #endif VKD3D_SHADER_TARGET_D3D_ASM, -#if 0 +#ifdef VKD3D_SHADER_UNSUPPORTED_GLSL VKD3D_SHADER_TARGET_GLSL, #endif }; @@ -1958,13 +1893,21 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( VKD3D_SHADER_TARGET_D3D_ASM, }; +#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL + static const enum vkd3d_shader_target_type dxbc_dxil_types[] = + { + VKD3D_SHADER_TARGET_SPIRV_BINARY, +# ifdef HAVE_SPIRV_TOOLS + VKD3D_SHADER_TARGET_SPIRV_TEXT, +# endif + VKD3D_SHADER_TARGET_D3D_ASM, + }; +#endif + TRACE("source_type %#x, count %p.\n", source_type, count); switch (source_type) { -#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL - case VKD3D_SHADER_SOURCE_DXBC_DXIL: -#endif case VKD3D_SHADER_SOURCE_DXBC_TPF: *count = ARRAY_SIZE(dxbc_tpf_types); return dxbc_tpf_types; @@ -1977,6 +1920,12 @@ const enum vkd3d_shader_target_type *vkd3d_shader_get_supported_target_types( *count = ARRAY_SIZE(d3dbc_types); return d3dbc_types; +#ifdef VKD3D_SHADER_UNSUPPORTED_DXIL + case VKD3D_SHADER_SOURCE_DXBC_DXIL: + *count = ARRAY_SIZE(dxbc_dxil_types); + return dxbc_dxil_types; +#endif + default: *count = 0; return NULL; diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h index 2d3b3254638..a33b6d2d967 100644 --- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h +++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h @@ -148,6 +148,8 @@ enum vkd3d_shader_error VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER = 5026, VKD3D_SHADER_ERROR_HLSL_NON_FINITE_RESULT = 5027, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_SWITCH_CASE = 5028, + VKD3D_SHADER_ERROR_HLSL_MISSING_TECHNIQUE = 5029, + VKD3D_SHADER_ERROR_HLSL_UNKNOWN_MODIFIER = 5030, VKD3D_SHADER_WARNING_HLSL_IMPLICIT_TRUNCATION = 5300, VKD3D_SHADER_WARNING_HLSL_DIVISION_BY_ZERO = 5301, @@ -445,6 +447,7 @@ enum vkd3d_shader_opcode VKD3DSIH_NOT, VKD3DSIH_NRM, VKD3DSIH_OR, + VKD3DSIH_ORD, VKD3DSIH_PHASE, VKD3DSIH_PHI, VKD3DSIH_POW, @@ -516,6 +519,7 @@ enum vkd3d_shader_opcode VKD3DSIH_UMAX, VKD3DSIH_UMIN, VKD3DSIH_UMUL, + VKD3DSIH_UNO, VKD3DSIH_USHR, VKD3DSIH_UTOD, VKD3DSIH_UTOF, @@ -620,14 +624,16 @@ enum vkd3d_data_type VKD3D_DATA_UINT8, VKD3D_DATA_UINT64, VKD3D_DATA_BOOL, + VKD3D_DATA_UINT16, + VKD3D_DATA_HALF, VKD3D_DATA_COUNT, }; static inline bool data_type_is_integer(enum vkd3d_data_type data_type) { - return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT - || data_type == VKD3D_DATA_UINT64; + return data_type == VKD3D_DATA_INT || data_type == VKD3D_DATA_UINT8 || data_type == VKD3D_DATA_UINT16 + || data_type == VKD3D_DATA_UINT || data_type == VKD3D_DATA_UINT64; } static inline bool data_type_is_bool(enum vkd3d_data_type data_type) @@ -808,6 +814,8 @@ enum vkd3d_shader_type VKD3D_SHADER_TYPE_COUNT, }; +struct vkd3d_shader_message_context; + struct vkd3d_shader_version { enum vkd3d_shader_type type; @@ -1025,7 +1033,7 @@ struct signature_element *vsir_signature_find_element_for_reg(const struct shade unsigned int reg_idx, unsigned int write_mask); void shader_signature_cleanup(struct shader_signature *signature); -struct vkd3d_shader_desc +struct dxbc_shader_desc { const uint32_t *byte_code; size_t byte_code_size; @@ -1033,7 +1041,10 @@ struct vkd3d_shader_desc struct shader_signature input_signature; struct shader_signature output_signature; struct shader_signature patch_constant_signature; +}; +struct vkd3d_shader_desc +{ struct { uint32_t used, external; @@ -1079,14 +1090,18 @@ struct vkd3d_shader_tgsm struct vkd3d_shader_tgsm_raw { struct vkd3d_shader_dst_param reg; + unsigned int alignment; unsigned int byte_count; + bool zero_init; }; struct vkd3d_shader_tgsm_structured { struct vkd3d_shader_dst_param reg; + unsigned int alignment; unsigned int byte_stride; unsigned int structure_count; + bool zero_init; }; struct vkd3d_shader_thread_group_size @@ -1290,6 +1305,10 @@ struct vsir_program struct vkd3d_shader_version shader_version; struct vkd3d_shader_instruction_array instructions; + struct shader_signature input_signature; + struct shader_signature output_signature; + struct shader_signature patch_constant_signature; + unsigned int input_control_point_count, output_control_point_count; unsigned int block_count; unsigned int temp_count; @@ -1302,6 +1321,10 @@ struct vsir_program bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve); void vsir_program_cleanup(struct vsir_program *program); +enum vkd3d_result vsir_program_normalise(struct vsir_program *program, uint64_t config_flags, + const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context); +enum vkd3d_result vsir_program_validate(struct vsir_program *program, uint64_t config_flags, + const char *source_name, struct vkd3d_shader_message_context *message_context); static inline struct vkd3d_shader_dst_param *vsir_program_get_dst_params( struct vsir_program *program, unsigned int count) @@ -1347,6 +1370,12 @@ static inline void vkd3d_shader_parser_destroy(struct vkd3d_shader_parser *parse parser->ops->parser_destroy(parser); } +static inline enum vkd3d_result vkd3d_shader_parser_validate(struct vkd3d_shader_parser *parser) +{ + return vsir_program_validate(&parser->program, parser->config_flags, + parser->location.source_name, parser->message_context); +} + struct vkd3d_shader_descriptor_info1 { enum vkd3d_shader_descriptor_type type; @@ -1385,21 +1414,22 @@ struct vkd3d_string_buffer_cache size_t count, max_count, capacity; }; -enum vsir_asm_dialect +enum vsir_asm_flags { - VSIR_ASM_VSIR, - VSIR_ASM_D3D, + VSIR_ASM_FLAG_NONE = 0, + VSIR_ASM_FLAG_DUMP_TYPES = 0x1, }; -enum vkd3d_result vkd3d_dxbc_binary_to_text(const struct vsir_program *program, +enum vkd3d_result d3d_asm_compile(const struct vsir_program *program, const struct vkd3d_shader_compile_info *compile_info, - struct vkd3d_shader_code *out, enum vsir_asm_dialect dialect); + struct vkd3d_shader_code *out, enum vsir_asm_flags flags); void vkd3d_string_buffer_cleanup(struct vkd3d_string_buffer *buffer); struct vkd3d_string_buffer *vkd3d_string_buffer_get(struct vkd3d_string_buffer_cache *list); void vkd3d_string_buffer_init(struct vkd3d_string_buffer *buffer); void vkd3d_string_buffer_cache_cleanup(struct vkd3d_string_buffer_cache *list); void vkd3d_string_buffer_cache_init(struct vkd3d_string_buffer_cache *list); void vkd3d_string_buffer_clear(struct vkd3d_string_buffer *buffer); +void vkd3d_string_buffer_truncate(struct vkd3d_string_buffer *buffer, size_t size); int vkd3d_string_buffer_print_f32(struct vkd3d_string_buffer *buffer, float f); int vkd3d_string_buffer_print_f64(struct vkd3d_string_buffer *buffer, double d); int vkd3d_string_buffer_printf(struct vkd3d_string_buffer *buffer, const char *format, ...) VKD3D_PRINTF_FUNC(2, 3); @@ -1408,6 +1438,7 @@ void vkd3d_string_buffer_release(struct vkd3d_string_buffer_cache *list, struct vkd3d_string_buffer_trace_(buffer, __FUNCTION__) void vkd3d_string_buffer_trace_(const struct vkd3d_string_buffer *buffer, const char *function); int vkd3d_string_buffer_vprintf(struct vkd3d_string_buffer *buffer, const char *format, va_list args); +void vkd3d_shader_code_from_string_buffer(struct vkd3d_shader_code *code, struct vkd3d_string_buffer *buffer); struct vkd3d_bytecode_buffer { @@ -1483,20 +1514,15 @@ int vkd3d_shader_sm4_parser_create(const struct vkd3d_shader_compile_info *compi int vkd3d_shader_sm6_parser_create(const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser); -void free_shader_desc(struct vkd3d_shader_desc *desc); +void free_dxbc_shader_desc(struct dxbc_shader_desc *desc); int shader_extract_from_dxbc(const struct vkd3d_shader_code *dxbc, - struct vkd3d_shader_message_context *message_context, const char *source_name, struct vkd3d_shader_desc *desc); + struct vkd3d_shader_message_context *message_context, const char *source_name, struct dxbc_shader_desc *desc); int shader_parse_input_signature(const struct vkd3d_shader_code *dxbc, struct vkd3d_shader_message_context *message_context, struct shader_signature *signature); -struct vkd3d_glsl_generator; - -struct vkd3d_glsl_generator *vkd3d_glsl_generator_create(const struct vkd3d_shader_version *version, - struct vkd3d_shader_message_context *message_context, const struct vkd3d_shader_location *location); -int vkd3d_glsl_generator_generate(struct vkd3d_glsl_generator *generator, - struct vsir_program *program, struct vkd3d_shader_code *out); -void vkd3d_glsl_generator_destroy(struct vkd3d_glsl_generator *generator); +int glsl_compile(struct vsir_program *program, struct vkd3d_shader_code *out, + struct vkd3d_shader_message_context *message_context); #define SPIRV_MAX_SRC_COUNT 6 @@ -1513,17 +1539,17 @@ int preproc_lexer_parse(const struct vkd3d_shader_compile_info *compile_info, int hlsl_compile_shader(const struct vkd3d_shader_code *hlsl, const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_code *out, struct vkd3d_shader_message_context *message_context); -enum vkd3d_result vsir_validate(struct vkd3d_shader_parser *parser); - static inline enum vkd3d_shader_component_type vkd3d_component_type_from_data_type( enum vkd3d_data_type data_type) { switch (data_type) { + case VKD3D_DATA_HALF: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_FLOAT: case VKD3D_DATA_UNORM: case VKD3D_DATA_SNORM: return VKD3D_SHADER_COMPONENT_FLOAT; + case VKD3D_DATA_UINT16: /* Minimum precision. TODO: native 16-bit */ case VKD3D_DATA_UINT: return VKD3D_SHADER_COMPONENT_UINT; case VKD3D_DATA_INT: @@ -1760,7 +1786,4 @@ void dxbc_writer_add_section(struct dxbc_writer *dxbc, uint32_t tag, const void void dxbc_writer_init(struct dxbc_writer *dxbc); int dxbc_writer_write(struct dxbc_writer *dxbc, struct vkd3d_shader_code *code); -enum vkd3d_result vkd3d_shader_normalise(struct vkd3d_shader_parser *parser, - const struct vkd3d_shader_compile_info *compile_info); - #endif /* __VKD3D_SHADER_PRIVATE_H */ diff --git a/libs/vkd3d/libs/vkd3d/device.c b/libs/vkd3d/libs/vkd3d/device.c index 17c7ccb3e31..7841a811bf7 100644 --- a/libs/vkd3d/libs/vkd3d/device.c +++ b/libs/vkd3d/libs/vkd3d/device.c @@ -94,6 +94,7 @@ static const struct vkd3d_optional_extension_info optional_device_extensions[] = VK_EXTENSION(EXT_DEBUG_MARKER, EXT_debug_marker), VK_EXTENSION(EXT_DEPTH_CLIP_ENABLE, EXT_depth_clip_enable), VK_EXTENSION(EXT_DESCRIPTOR_INDEXING, EXT_descriptor_indexing), + VK_EXTENSION(EXT_FRAGMENT_SHADER_INTERLOCK, EXT_fragment_shader_interlock), VK_EXTENSION(EXT_MUTABLE_DESCRIPTOR_TYPE, EXT_mutable_descriptor_type), VK_EXTENSION(EXT_ROBUSTNESS_2, EXT_robustness2), VK_EXTENSION(EXT_SHADER_DEMOTE_TO_HELPER_INVOCATION, EXT_shader_demote_to_helper_invocation), @@ -789,6 +790,7 @@ struct vkd3d_physical_device_info VkPhysicalDeviceConditionalRenderingFeaturesEXT conditional_rendering_features; VkPhysicalDeviceDepthClipEnableFeaturesEXT depth_clip_features; VkPhysicalDeviceDescriptorIndexingFeaturesEXT descriptor_indexing_features; + VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT fragment_shader_interlock_features; VkPhysicalDeviceRobustness2FeaturesEXT robustness2_features; VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT demote_features; VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT texel_buffer_alignment_features; @@ -808,6 +810,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *vertex_divisor_properties; VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *buffer_alignment_properties; VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing_features; + VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; VkPhysicalDeviceRobustness2FeaturesEXT *robustness2_features; VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *vertex_divisor_features; VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; @@ -825,6 +828,7 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i conditional_rendering_features = &info->conditional_rendering_features; depth_clip_features = &info->depth_clip_features; descriptor_indexing_features = &info->descriptor_indexing_features; + fragment_shader_interlock_features = &info->fragment_shader_interlock_features; robustness2_features = &info->robustness2_features; descriptor_indexing_properties = &info->descriptor_indexing_properties; maintenance3_properties = &info->maintenance3_properties; @@ -846,6 +850,8 @@ static void vkd3d_physical_device_info_init(struct vkd3d_physical_device_info *i vk_prepend_struct(&info->features2, depth_clip_features); descriptor_indexing_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES_EXT; vk_prepend_struct(&info->features2, descriptor_indexing_features); + fragment_shader_interlock_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADER_INTERLOCK_FEATURES_EXT; + vk_prepend_struct(&info->features2, fragment_shader_interlock_features); robustness2_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT; vk_prepend_struct(&info->features2, robustness2_features); demote_features->sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT; @@ -1158,6 +1164,7 @@ static void vkd3d_trace_physical_device_limits(const struct vkd3d_physical_devic static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_device_info *info) { + const VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock_features; const VkPhysicalDeviceConditionalRenderingFeaturesEXT *conditional_rendering_features; const VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *demote_features; const VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *buffer_alignment_features; @@ -1279,6 +1286,15 @@ static void vkd3d_trace_physical_device_features(const struct vkd3d_physical_dev TRACE(" VkPhysicalDeviceDepthClipEnableFeaturesEXT:\n"); TRACE(" depthClipEnable: %#x.\n", depth_clip_features->depthClipEnable); + fragment_shader_interlock_features = &info->fragment_shader_interlock_features; + TRACE(" VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT:\n"); + TRACE(" fragmentShaderSampleInterlock: %#x.\n.", + fragment_shader_interlock_features->fragmentShaderSampleInterlock); + TRACE(" fragmentShaderPixelInterlock: %#x\n.", + fragment_shader_interlock_features->fragmentShaderPixelInterlock); + TRACE(" fragmentShaderShadingRateInterlock: %#x\n.", + fragment_shader_interlock_features->fragmentShaderShadingRateInterlock); + demote_features = &info->demote_features; TRACE(" VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT:\n"); TRACE(" shaderDemoteToHelperInvocation: %#x.\n", demote_features->shaderDemoteToHelperInvocation); @@ -1476,6 +1492,7 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, uint32_t *device_extension_count, bool **user_extension_supported) { const struct vkd3d_vk_instance_procs *vk_procs = &device->vkd3d_instance->vk_procs; + VkPhysicalDeviceFragmentShaderInterlockFeaturesEXT *fragment_shader_interlock; const struct vkd3d_optional_device_extensions_info *optional_extensions; VkPhysicalDeviceDescriptorIndexingFeaturesEXT *descriptor_indexing; VkPhysicalDevice physical_device = device->vk_physical_device; @@ -1539,8 +1556,6 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, device->feature_options.TypedUAVLoadAdditionalFormats = features->shaderStorageImageReadWithoutFormat && d3d12_device_supports_typed_uav_load_additional_formats(device); - /* GL_INTEL_fragment_shader_ordering, no Vulkan equivalent. */ - device->feature_options.ROVsSupported = FALSE; /* GL_INTEL_conservative_rasterization, no Vulkan equivalent. */ device->feature_options.ConservativeRasterizationTier = D3D12_CONSERVATIVE_RASTERIZATION_TIER_NOT_SUPPORTED; device->feature_options.MaxGPUVirtualAddressBitsPerResource = 40; /* FIXME */ @@ -1619,6 +1634,12 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, *user_extension_supported, vulkan_info, "device", device->vkd3d_instance->config_flags & VKD3D_CONFIG_FLAG_VULKAN_DEBUG); + fragment_shader_interlock = &physical_device_info->fragment_shader_interlock_features; + if (!fragment_shader_interlock->fragmentShaderSampleInterlock + || !fragment_shader_interlock->fragmentShaderPixelInterlock) + vulkan_info->EXT_fragment_shader_interlock = false; + device->feature_options.ROVsSupported = vulkan_info->EXT_fragment_shader_interlock; + if (!physical_device_info->conditional_rendering_features.conditionalRendering) vulkan_info->EXT_conditional_rendering = false; if (!physical_device_info->depth_clip_features.depthClipEnable) @@ -1675,6 +1696,10 @@ static HRESULT vkd3d_init_device_caps(struct d3d12_device *device, vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] = VKD3D_SHADER_SPIRV_EXTENSION_EXT_DESCRIPTOR_INDEXING; + if (vulkan_info->EXT_fragment_shader_interlock) + vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] + = VKD3D_SHADER_SPIRV_EXTENSION_EXT_FRAGMENT_SHADER_INTERLOCK; + if (vulkan_info->EXT_shader_stencil_export) vulkan_info->shader_extensions[vulkan_info->shader_extension_count++] = VKD3D_SHADER_SPIRV_EXTENSION_EXT_STENCIL_EXPORT; @@ -2499,17 +2524,18 @@ static void vkd3d_desc_object_cache_cleanup(struct vkd3d_desc_object_cache *cach } /* ID3D12Device */ -static inline struct d3d12_device *impl_from_ID3D12Device7(ID3D12Device7 *iface) +static inline struct d3d12_device *impl_from_ID3D12Device8(ID3D12Device8 *iface) { - return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device7_iface); + return CONTAINING_RECORD(iface, struct d3d12_device, ID3D12Device8_iface); } -static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device8 *iface, REFIID riid, void **object) { TRACE("iface %p, riid %s, object %p.\n", iface, debugstr_guid(riid), object); - if (IsEqualGUID(riid, &IID_ID3D12Device7) + if (IsEqualGUID(riid, &IID_ID3D12Device8) + || IsEqualGUID(riid, &IID_ID3D12Device7) || IsEqualGUID(riid, &IID_ID3D12Device6) || IsEqualGUID(riid, &IID_ID3D12Device5) || IsEqualGUID(riid, &IID_ID3D12Device4) @@ -2531,9 +2557,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_QueryInterface(ID3D12Device7 *ifac return E_NOINTERFACE; } -static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device7 *iface) +static ULONG STDMETHODCALLTYPE d3d12_device_AddRef(ID3D12Device8 *iface) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); unsigned int refcount = vkd3d_atomic_increment_u32(&device->refcount); TRACE("%p increasing refcount to %u.\n", device, refcount); @@ -2563,9 +2589,9 @@ static HRESULT device_worker_stop(struct d3d12_device *device) return S_OK; } -static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device7 *iface) +static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device8 *iface) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); unsigned int refcount = vkd3d_atomic_decrement_u32(&device->refcount); TRACE("%p decreasing refcount to %u.\n", device, refcount); @@ -2602,10 +2628,10 @@ static ULONG STDMETHODCALLTYPE d3d12_device_Release(ID3D12Device7 *iface) return refcount; } -static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device8 *iface, REFGUID guid, UINT *data_size, void *data) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); TRACE("iface %p, guid %s, data_size %p, data %p.\n", iface, debugstr_guid(guid), data_size, data); @@ -2613,10 +2639,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_GetPrivateData(ID3D12Device7 *ifac return vkd3d_get_private_data(&device->private_store, guid, data_size, data); } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device8 *iface, REFGUID guid, UINT data_size, const void *data) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); TRACE("iface %p, guid %s, data_size %u, data %p.\n", iface, debugstr_guid(guid), data_size, data); @@ -2624,19 +2650,19 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateData(ID3D12Device7 *ifac return vkd3d_set_private_data(&device->private_store, guid, data_size, data); } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetPrivateDataInterface(ID3D12Device8 *iface, REFGUID guid, const IUnknown *data) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); TRACE("iface %p, guid %s, data %p.\n", iface, debugstr_guid(guid), data); return vkd3d_set_private_data_interface(&device->private_store, guid, data); } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device7 *iface, const WCHAR *name) +static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device8 *iface, const WCHAR *name) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); TRACE("iface %p, name %s.\n", iface, debugstr_w(name, device->wchar_size)); @@ -2644,17 +2670,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetName(ID3D12Device7 *iface, cons VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT, name); } -static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device7 *iface) +static UINT STDMETHODCALLTYPE d3d12_device_GetNodeCount(ID3D12Device8 *iface) { TRACE("iface %p.\n", iface); return 1; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device8 *iface, const D3D12_COMMAND_QUEUE_DESC *desc, REFIID riid, void **command_queue) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_command_queue *object; HRESULT hr; @@ -2668,10 +2694,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandQueue(ID3D12Device7 * riid, command_queue); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Device8 *iface, D3D12_COMMAND_LIST_TYPE type, REFIID riid, void **command_allocator) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_command_allocator *object; HRESULT hr; @@ -2685,10 +2711,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandAllocator(ID3D12Devic riid, command_allocator); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12Device8 *iface, const D3D12_GRAPHICS_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_pipeline_state *object; HRESULT hr; @@ -2702,10 +2728,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateGraphicsPipelineState(ID3D12 &IID_ID3D12PipelineState, riid, pipeline_state); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12Device8 *iface, const D3D12_COMPUTE_PIPELINE_STATE_DESC *desc, REFIID riid, void **pipeline_state) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_pipeline_state *object; HRESULT hr; @@ -2719,11 +2745,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateComputePipelineState(ID3D12D &IID_ID3D12PipelineState, riid, pipeline_state); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList(ID3D12Device8 *iface, UINT node_mask, D3D12_COMMAND_LIST_TYPE type, ID3D12CommandAllocator *command_allocator, ID3D12PipelineState *initial_pipeline_state, REFIID riid, void **command_list) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_command_list *object; HRESULT hr; @@ -2846,10 +2872,10 @@ bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent) return true; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device8 *iface, D3D12_FEATURE feature, void *feature_data, UINT feature_data_size) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); TRACE("iface %p, feature %#x, feature_data %p, feature_data_size %u.\n", iface, feature, feature_data, feature_data_size); @@ -3521,10 +3547,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CheckFeatureSupport(ID3D12Device7 } } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device8 *iface, const D3D12_DESCRIPTOR_HEAP_DESC *desc, REFIID riid, void **descriptor_heap) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_descriptor_heap *object; HRESULT hr; @@ -3538,7 +3564,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateDescriptorHeap(ID3D12Device7 &IID_ID3D12DescriptorHeap, riid, descriptor_heap); } -static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device7 *iface, +static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D12Device8 *iface, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) { TRACE("iface %p, descriptor_heap_type %#x.\n", iface, descriptor_heap_type); @@ -3561,11 +3587,11 @@ static UINT STDMETHODCALLTYPE d3d12_device_GetDescriptorHandleIncrementSize(ID3D } } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device8 *iface, UINT node_mask, const void *bytecode, SIZE_T bytecode_length, REFIID riid, void **root_signature) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_root_signature *object; HRESULT hr; @@ -3581,10 +3607,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateRootSignature(ID3D12Device7 &IID_ID3D12RootSignature, riid, root_signature); } -static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device8 *iface, const D3D12_CONSTANT_BUFFER_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_desc tmp = {0}; TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); @@ -3593,11 +3619,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateConstantBufferView(ID3D12Device d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } -static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device8 *iface, ID3D12Resource *resource, const D3D12_SHADER_RESOURCE_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_desc tmp = {0}; TRACE("iface %p, resource %p, desc %p, descriptor %s.\n", @@ -3607,11 +3633,11 @@ static void STDMETHODCALLTYPE d3d12_device_CreateShaderResourceView(ID3D12Device d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } -static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Device8 *iface, ID3D12Resource *resource, ID3D12Resource *counter_resource, const D3D12_UNORDERED_ACCESS_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_desc tmp = {0}; TRACE("iface %p, resource %p, counter_resource %p, desc %p, descriptor %s.\n", @@ -3622,7 +3648,7 @@ static void STDMETHODCALLTYPE d3d12_device_CreateUnorderedAccessView(ID3D12Devic d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } -static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device8 *iface, ID3D12Resource *resource, const D3D12_RENDER_TARGET_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { @@ -3630,10 +3656,10 @@ static void STDMETHODCALLTYPE d3d12_device_CreateRenderTargetView(ID3D12Device7 iface, resource, desc, debug_cpu_handle(descriptor)); d3d12_rtv_desc_create_rtv(d3d12_rtv_desc_from_cpu_handle(descriptor), - impl_from_ID3D12Device7(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + impl_from_ID3D12Device8(iface), unsafe_impl_from_ID3D12Resource(resource), desc); } -static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device8 *iface, ID3D12Resource *resource, const D3D12_DEPTH_STENCIL_VIEW_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { @@ -3641,13 +3667,13 @@ static void STDMETHODCALLTYPE d3d12_device_CreateDepthStencilView(ID3D12Device7 iface, resource, desc, debug_cpu_handle(descriptor)); d3d12_dsv_desc_create_dsv(d3d12_dsv_desc_from_cpu_handle(descriptor), - impl_from_ID3D12Device7(iface), unsafe_impl_from_ID3D12Resource(resource), desc); + impl_from_ID3D12Device8(iface), unsafe_impl_from_ID3D12Resource(resource), desc); } -static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device8 *iface, const D3D12_SAMPLER_DESC *desc, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_desc tmp = {0}; TRACE("iface %p, desc %p, descriptor %s.\n", iface, desc, debug_cpu_handle(descriptor)); @@ -3656,14 +3682,14 @@ static void STDMETHODCALLTYPE d3d12_device_CreateSampler(ID3D12Device7 *iface, d3d12_desc_write_atomic(d3d12_desc_from_cpu_handle(descriptor), &tmp, device); } -static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device8 *iface, UINT dst_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *dst_descriptor_range_offsets, const UINT *dst_descriptor_range_sizes, UINT src_descriptor_range_count, const D3D12_CPU_DESCRIPTOR_HANDLE *src_descriptor_range_offsets, const UINT *src_descriptor_range_sizes, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); unsigned int dst_range_idx, dst_idx, src_range_idx, src_idx; unsigned int dst_range_size, src_range_size; struct d3d12_descriptor_heap *dst_heap; @@ -3719,7 +3745,7 @@ static void STDMETHODCALLTYPE d3d12_device_CopyDescriptors(ID3D12Device7 *iface, } } -static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_CopyDescriptorsSimple(ID3D12Device8 *iface, UINT descriptor_count, const D3D12_CPU_DESCRIPTOR_HANDLE dst_descriptor_range_offset, const D3D12_CPU_DESCRIPTOR_HANDLE src_descriptor_range_offset, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_heap_type) @@ -3850,10 +3876,10 @@ static void d3d12_device_get_resource_allocation_info(struct d3d12_device *devic } static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo( - ID3D12Device7 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, + ID3D12Device8 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, UINT count, const D3D12_RESOURCE_DESC *resource_descs) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p.\n", iface, info, visible_mask, count, resource_descs); @@ -3865,10 +3891,10 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour return info; } -static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device7 *iface, +static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapProperties(ID3D12Device8 *iface, D3D12_HEAP_PROPERTIES *heap_properties, UINT node_mask, D3D12_HEAP_TYPE heap_type) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); bool coherent; TRACE("iface %p, heap_properties %p, node_mask 0x%08x, heap_type %#x.\n", @@ -3908,12 +3934,12 @@ static D3D12_HEAP_PROPERTIES * STDMETHODCALLTYPE d3d12_device_GetCustomHeapPrope return heap_properties; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Device8 *iface, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); D3D12_RESOURCE_DESC1 resource_desc; struct d3d12_resource *object; HRESULT hr; @@ -3935,10 +3961,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource(ID3D12Devi return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device8 *iface, const D3D12_HEAP_DESC *desc, REFIID iid, void **heap) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_heap *object; HRESULT hr; @@ -3954,12 +3980,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap(ID3D12Device7 *iface, return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device8 *iface, ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); D3D12_RESOURCE_DESC1 resource_desc; struct d3d12_heap *heap_object; struct d3d12_resource *object; @@ -3980,11 +4006,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource(ID3D12Device7 return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Device8 *iface, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); D3D12_RESOURCE_DESC1 resource_desc; struct d3d12_resource *object; HRESULT hr; @@ -4001,11 +4027,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource(ID3D12Devic return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device8 *iface, ID3D12DeviceChild *object, const SECURITY_ATTRIBUTES *attributes, DWORD access, const WCHAR *name, HANDLE *handle) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); FIXME("iface %p, object %p, attributes %p, access %#x, name %s, handle %p stub!\n", iface, object, attributes, (uint32_t)access, debugstr_w(name, device->wchar_size), handle); @@ -4013,7 +4039,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateSharedHandle(ID3D12Device7 * return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device8 *iface, HANDLE handle, REFIID riid, void **object) { FIXME("iface %p, handle %p, riid %s, object %p stub!\n", @@ -4022,10 +4048,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandle(ID3D12Device7 *if return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Device8 *iface, const WCHAR *name, DWORD access, HANDLE *handle) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); FIXME("iface %p, name %s, access %#x, handle %p stub!\n", iface, debugstr_w(name, device->wchar_size), (uint32_t)access, handle); @@ -4033,7 +4059,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenSharedHandleByName(ID3D12Devic return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device8 *iface, UINT object_count, ID3D12Pageable * const *objects) { ID3D12Fence *fence; @@ -4041,17 +4067,17 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_MakeResident(ID3D12Device7 *iface, TRACE("iface %p, object_count %u, objects %p.\n", iface, object_count, objects); - if (FAILED(hr = ID3D12Device7_CreateFence(iface, 0, 0, &IID_ID3D12Fence, (void **)&fence))) + if (FAILED(hr = ID3D12Device8_CreateFence(iface, 0, 0, &IID_ID3D12Fence, (void **)&fence))) return hr; - hr = ID3D12Device7_EnqueueMakeResident(iface, 0, object_count, objects, fence, 1); + hr = ID3D12Device8_EnqueueMakeResident(iface, 0, object_count, objects, fence, 1); if (SUCCEEDED(hr)) ID3D12Fence_SetEventOnCompletion(fence, 1, NULL); ID3D12Fence_Release(fence); return hr; } -static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device8 *iface, UINT object_count, ID3D12Pageable * const *objects) { FIXME_ONCE("iface %p, object_count %u, objects %p stub!\n", @@ -4060,10 +4086,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_Evict(ID3D12Device7 *iface, return S_OK; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device8 *iface, UINT64 initial_value, D3D12_FENCE_FLAGS flags, REFIID riid, void **fence) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_fence *object; HRESULT hr; @@ -4076,9 +4102,9 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateFence(ID3D12Device7 *iface, return return_interface(&object->ID3D12Fence1_iface, &IID_ID3D12Fence1, riid, fence); } -static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device7 *iface) +static HRESULT STDMETHODCALLTYPE d3d12_device_GetDeviceRemovedReason(ID3D12Device8 *iface) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); TRACE("iface %p.\n", iface); @@ -4163,12 +4189,12 @@ static void d3d12_device_get_copyable_footprints(struct d3d12_device *device, *total_bytes = total; } -static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device8 *iface, const D3D12_RESOURCE_DESC *desc, UINT first_sub_resource, UINT sub_resource_count, UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, UINT64 *row_sizes, UINT64 *total_bytes) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); D3D12_RESOURCE_DESC1 resource_desc; TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " @@ -4182,10 +4208,10 @@ static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints(ID3D12Device7 * base_offset, layouts, row_counts, row_sizes, total_bytes); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device8 *iface, const D3D12_QUERY_HEAP_DESC *desc, REFIID iid, void **heap) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_query_heap *object; HRESULT hr; @@ -4198,18 +4224,18 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateQueryHeap(ID3D12Device7 *ifa return return_interface(&object->ID3D12QueryHeap_iface, &IID_ID3D12QueryHeap, iid, heap); } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device7 *iface, BOOL enable) +static HRESULT STDMETHODCALLTYPE d3d12_device_SetStablePowerState(ID3D12Device8 *iface, BOOL enable) { FIXME("iface %p, enable %#x stub!\n", iface, enable); return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Device8 *iface, const D3D12_COMMAND_SIGNATURE_DESC *desc, ID3D12RootSignature *root_signature, REFIID iid, void **command_signature) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_command_signature *object; HRESULT hr; @@ -4223,14 +4249,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandSignature(ID3D12Devic &IID_ID3D12CommandSignature, iid, command_signature); } -static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device8 *iface, ID3D12Resource *resource, UINT *total_tile_count, D3D12_PACKED_MIP_INFO *packed_mip_info, D3D12_TILE_SHAPE *standard_tile_shape, UINT *sub_resource_tiling_count, UINT first_sub_resource_tiling, D3D12_SUBRESOURCE_TILING *sub_resource_tilings) { const struct d3d12_resource *resource_impl = impl_from_ID3D12Resource(resource); - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); TRACE("iface %p, resource %p, total_tile_count %p, packed_mip_info %p, " "standard_title_shape %p, sub_resource_tiling_count %p, " @@ -4243,9 +4269,9 @@ static void STDMETHODCALLTYPE d3d12_device_GetResourceTiling(ID3D12Device7 *ifac sub_resource_tiling_count, first_sub_resource_tiling, sub_resource_tilings); } -static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device7 *iface, LUID *luid) +static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device8 *iface, LUID *luid) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); TRACE("iface %p, luid %p.\n", iface, luid); @@ -4254,7 +4280,7 @@ static LUID * STDMETHODCALLTYPE d3d12_device_GetAdapterLuid(ID3D12Device7 *iface return luid; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device8 *iface, const void *blob, SIZE_T blob_size, REFIID iid, void **lib) { FIXME("iface %p, blob %p, blob_size %"PRIuPTR", iid %s, lib %p stub!\n", @@ -4263,7 +4289,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineLibrary(ID3D12Device return DXGI_ERROR_UNSUPPORTED; } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion(ID3D12Device8 *iface, ID3D12Fence *const *fences, const UINT64 *values, UINT fence_count, D3D12_MULTIPLE_FENCE_WAIT_FLAGS flags, HANDLE event) { @@ -4273,7 +4299,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetEventOnMultipleFenceCompletion( return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device8 *iface, UINT object_count, ID3D12Pageable *const *objects, const D3D12_RESIDENCY_PRIORITY *priorities) { FIXME_ONCE("iface %p, object_count %u, objects %p, priorities %p stub!\n", iface, object_count, objects, priorities); @@ -4281,10 +4307,10 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetResidencyPriority(ID3D12Device7 return S_OK; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device8 *iface, const D3D12_PIPELINE_STATE_STREAM_DESC *desc, REFIID iid, void **pipeline_state) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_pipeline_state *object; HRESULT hr; @@ -4296,7 +4322,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePipelineState(ID3D12Device7 return return_interface(&object->ID3D12PipelineState_iface, &IID_ID3D12PipelineState, iid, pipeline_state); } -static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12Device8 *iface, const void *address, REFIID iid, void **heap) { FIXME("iface %p, address %p, iid %s, heap %p stub!\n", iface, address, debugstr_guid(iid), heap); @@ -4304,7 +4330,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromAddress(ID3D12 return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID3D12Device8 *iface, HANDLE file_mapping, REFIID iid, void **heap) { FIXME("iface %p, file_mapping %p, iid %s, heap %p stub!\n", iface, file_mapping, debugstr_guid(iid), heap); @@ -4312,7 +4338,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_OpenExistingHeapFromFileMapping(ID return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device8 *iface, D3D12_RESIDENCY_FLAGS flags, UINT num_objects, ID3D12Pageable *const *objects, ID3D12Fence *fence, UINT64 fence_value) { @@ -4323,7 +4349,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnqueueMakeResident(ID3D12Device7 return S_OK; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device8 *iface, UINT node_mask, D3D12_COMMAND_LIST_TYPE type, D3D12_COMMAND_LIST_FLAGS flags, REFIID iid, void **command_list) { @@ -4333,7 +4359,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommandList1(ID3D12Device7 * return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3D12Device8 *iface, const D3D12_PROTECTED_RESOURCE_SESSION_DESC *desc, REFIID iid, void **session) { FIXME("iface %p, desc %p, iid %s, session %p stub!\n", iface, desc, debugstr_guid(iid), session); @@ -4341,13 +4367,13 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession(ID3 return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Device8 *iface, const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); D3D12_RESOURCE_DESC1 resource_desc; struct d3d12_resource *object; HRESULT hr; @@ -4369,11 +4395,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource1(ID3D12Dev return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device8 *iface, const D3D12_HEAP_DESC *desc, ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **heap) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); struct d3d12_heap *object; HRESULT hr; @@ -4389,7 +4415,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateHeap1(ID3D12Device7 *iface, return return_interface(&object->ID3D12Heap_iface, &IID_ID3D12Heap, iid, heap); } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Device8 *iface, const D3D12_RESOURCE_DESC *desc, D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) @@ -4403,11 +4429,11 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateReservedResource1(ID3D12Devi } static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo1( - ID3D12Device7 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, + ID3D12Device8 *iface, D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, UINT count, const D3D12_RESOURCE_DESC *resource_descs, D3D12_RESOURCE_ALLOCATION_INFO1 *info1) { - struct d3d12_device *device = impl_from_ID3D12Device7(iface); + struct d3d12_device *device = impl_from_ID3D12Device8(iface); TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", iface, info, visible_mask, count, resource_descs, info1); @@ -4419,7 +4445,7 @@ static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResour return info; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device8 *iface, ID3D12LifetimeOwner *owner, REFIID iid, void **tracker) { FIXME("iface %p, owner %p, iid %s, tracker %p stub!\n", iface, owner, debugstr_guid(iid), tracker); @@ -4427,12 +4453,12 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateLifetimeTracker(ID3D12Device return E_NOTIMPL; } -static void STDMETHODCALLTYPE d3d12_device_RemoveDevice(ID3D12Device7 *iface) +static void STDMETHODCALLTYPE d3d12_device_RemoveDevice(ID3D12Device8 *iface) { FIXME("iface %p stub!\n", iface); } -static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device8 *iface, UINT *num_meta_commands, D3D12_META_COMMAND_DESC *command_desc) { FIXME("iface %p, num_meta_commands %p, command_desc %p stub!\n", iface, @@ -4441,7 +4467,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommands(ID3D12Device return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3D12Device8 *iface, REFGUID command_id, D3D12_META_COMMAND_PARAMETER_STAGE stage, UINT *size_in_bytes, UINT *parameter_count, D3D12_META_COMMAND_PARAMETER_DESC *parameter_desc) @@ -4453,7 +4479,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_EnumerateMetaCommandParameters(ID3 return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device8 *iface, REFGUID command_id, UINT node_mask, const void *parameters_data, SIZE_T data_size_in_bytes, REFIID iid, void **meta_command) { @@ -4465,7 +4491,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateMetaCommand(ID3D12Device7 *i return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device8 *iface, const D3D12_STATE_OBJECT_DESC *desc, REFIID iid, void **state_object) { FIXME("iface %p, desc %p, iid %s, state_object %p stub!\n", iface, desc, debugstr_guid(iid), state_object); @@ -4473,14 +4499,14 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateStateObject(ID3D12Device7 *i return E_NOTIMPL; } -static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo(ID3D12Device7 *iface, +static void STDMETHODCALLTYPE d3d12_device_GetRaytracingAccelerationStructurePrebuildInfo(ID3D12Device8 *iface, const D3D12_BUILD_RAYTRACING_ACCELERATION_STRUCTURE_INPUTS *desc, D3D12_RAYTRACING_ACCELERATION_STRUCTURE_PREBUILD_INFO *info) { FIXME("iface %p, desc %p, info %p stub!\n", iface, desc, info); } -static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(ID3D12Device7 *iface, +static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_CheckDriverMatchingIdentifier(ID3D12Device8 *iface, D3D12_SERIALIZED_DATA_TYPE data_type, const D3D12_SERIALIZED_DATA_DRIVER_MATCHING_IDENTIFIER *identifier) { FIXME("iface %p, data_type %u, identifier %p stub!\n", iface, data_type, identifier); @@ -4488,7 +4514,7 @@ static D3D12_DRIVER_MATCHING_IDENTIFIER_STATUS STDMETHODCALLTYPE d3d12_device_Ch return D3D12_DRIVER_MATCHING_IDENTIFIER_UNRECOGNIZED; } -static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12Device8 *iface, D3D12_BACKGROUND_PROCESSING_MODE mode, D3D12_MEASUREMENTS_ACTION action, HANDLE event, BOOL *further_measurements_desired) { @@ -4498,7 +4524,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_SetBackgroundProcessingMode(ID3D12 return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device8 *iface, const D3D12_STATE_OBJECT_DESC *addition, ID3D12StateObject *state_object_to_grow_from, REFIID riid, void **new_state_object) { @@ -4508,7 +4534,7 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_AddToStateObject(ID3D12Device7 *if return E_NOTIMPL; } -static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID3D12Device7 *iface, +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID3D12Device8 *iface, const D3D12_PROTECTED_RESOURCE_SESSION_DESC1 *desc, REFIID riid, void **session) { FIXME("iface %p, desc %p, riid %s, session %p stub!\n", iface, desc, debugstr_guid(riid), session); @@ -4516,7 +4542,94 @@ static HRESULT STDMETHODCALLTYPE d3d12_device_CreateProtectedResourceSession1(ID return E_NOTIMPL; } -static const struct ID3D12Device7Vtbl d3d12_device_vtbl = +static D3D12_RESOURCE_ALLOCATION_INFO * STDMETHODCALLTYPE d3d12_device_GetResourceAllocationInfo2(ID3D12Device8 *iface, + D3D12_RESOURCE_ALLOCATION_INFO *info, UINT visible_mask, UINT count, + const D3D12_RESOURCE_DESC1 *resource_descs, D3D12_RESOURCE_ALLOCATION_INFO1 *info1) +{ + struct d3d12_device *device = impl_from_ID3D12Device8(iface); + + TRACE("iface %p, info %p, visible_mask 0x%08x, count %u, resource_descs %p, info1 %p.\n", + iface, info, visible_mask, count, resource_descs, info1); + + debug_ignored_node_mask(visible_mask); + + d3d12_device_get_resource1_allocation_info(device, info1, count, resource_descs, info); + + return info; +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreateCommittedResource2(ID3D12Device8 *iface, + const D3D12_HEAP_PROPERTIES *heap_properties, D3D12_HEAP_FLAGS heap_flags, const D3D12_RESOURCE_DESC1 *desc, + D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, + ID3D12ProtectedResourceSession *protected_session, REFIID iid, void **resource) +{ + struct d3d12_device *device = impl_from_ID3D12Device8(iface); + struct d3d12_resource *object; + HRESULT hr; + + TRACE("iface %p, heap_properties %p, heap_flags %#x, desc %p, initial_state %#x, " + "optimized_clear_value %p, protected_session %p, iid %s, resource %p.\n", + iface, heap_properties, heap_flags, desc, initial_state, + optimized_clear_value, protected_session, debugstr_guid(iid), resource); + + if (FAILED(hr = d3d12_committed_resource_create(device, heap_properties, heap_flags, + desc, initial_state, optimized_clear_value, protected_session, &object))) + { + *resource = NULL; + return hr; + } + + return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); +} + +static HRESULT STDMETHODCALLTYPE d3d12_device_CreatePlacedResource1(ID3D12Device8 *iface, + ID3D12Heap *heap, UINT64 heap_offset, const D3D12_RESOURCE_DESC1 *resource_desc, + D3D12_RESOURCE_STATES initial_state, const D3D12_CLEAR_VALUE *optimized_clear_value, + REFIID iid, void **resource) +{ + struct d3d12_device *device = impl_from_ID3D12Device8(iface); + struct d3d12_heap *heap_object; + struct d3d12_resource *object; + HRESULT hr; + + TRACE("iface %p, heap %p, heap_offset %#"PRIx64", desc %p, initial_state %#x, " + "optimized_clear_value %p, iid %s, resource %p.\n", + iface, heap, heap_offset, resource_desc, initial_state, + optimized_clear_value, debugstr_guid(iid), resource); + + heap_object = unsafe_impl_from_ID3D12Heap(heap); + + if (FAILED(hr = d3d12_placed_resource_create(device, heap_object, heap_offset, + resource_desc, initial_state, optimized_clear_value, &object))) + return hr; + + return return_interface(&object->ID3D12Resource2_iface, &IID_ID3D12Resource2, iid, resource); +} + +static void STDMETHODCALLTYPE d3d12_device_CreateSamplerFeedbackUnorderedAccessView(ID3D12Device8 *iface, + ID3D12Resource *target_resource, ID3D12Resource *feedback_resource, D3D12_CPU_DESCRIPTOR_HANDLE descriptor) +{ + FIXME("iface %p, target_resource %p, feedback_resource %p, descriptor %s stub!\n", + iface, target_resource, feedback_resource, debug_cpu_handle(descriptor)); +} + +static void STDMETHODCALLTYPE d3d12_device_GetCopyableFootprints1(ID3D12Device8 *iface, + const D3D12_RESOURCE_DESC1 *desc, UINT first_sub_resource, UINT sub_resource_count, + UINT64 base_offset, D3D12_PLACED_SUBRESOURCE_FOOTPRINT *layouts, UINT *row_counts, + UINT64 *row_sizes, UINT64 *total_bytes) +{ + struct d3d12_device *device = impl_from_ID3D12Device8(iface); + + TRACE("iface %p, desc %p, first_sub_resource %u, sub_resource_count %u, base_offset %#"PRIx64", " + "layouts %p, row_counts %p, row_sizes %p, total_bytes %p.\n", + iface, desc, first_sub_resource, sub_resource_count, base_offset, + layouts, row_counts, row_sizes, total_bytes); + + d3d12_device_get_copyable_footprints(device, desc, first_sub_resource, sub_resource_count, + base_offset, layouts, row_counts, row_sizes, total_bytes); +} + +static const struct ID3D12Device8Vtbl d3d12_device_vtbl = { /* IUnknown methods */ d3d12_device_QueryInterface, @@ -4596,14 +4709,20 @@ static const struct ID3D12Device7Vtbl d3d12_device_vtbl = /* ID3D12Device7 methods */ d3d12_device_AddToStateObject, d3d12_device_CreateProtectedResourceSession1, + /* ID3D12Device8 methods */ + d3d12_device_GetResourceAllocationInfo2, + d3d12_device_CreateCommittedResource2, + d3d12_device_CreatePlacedResource1, + d3d12_device_CreateSamplerFeedbackUnorderedAccessView, + d3d12_device_GetCopyableFootprints1, }; -struct d3d12_device *unsafe_impl_from_ID3D12Device7(ID3D12Device7 *iface) +struct d3d12_device *unsafe_impl_from_ID3D12Device8(ID3D12Device8 *iface) { if (!iface) return NULL; assert(iface->lpVtbl == &d3d12_device_vtbl); - return impl_from_ID3D12Device7(iface); + return impl_from_ID3D12Device8(iface); } static void *device_worker_main(void *arg) @@ -4646,7 +4765,7 @@ static HRESULT d3d12_device_init(struct d3d12_device *device, const struct vkd3d_vk_device_procs *vk_procs; HRESULT hr; - device->ID3D12Device7_iface.lpVtbl = &d3d12_device_vtbl; + device->ID3D12Device8_iface.lpVtbl = &d3d12_device_vtbl; device->refcount = 1; vkd3d_instance_incref(device->vkd3d_instance = instance); @@ -4894,28 +5013,28 @@ HRESULT vkd3d_join_thread(struct vkd3d_instance *instance, union vkd3d_thread_ha IUnknown *vkd3d_get_device_parent(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device8((ID3D12Device8 *)device); return d3d12_device->parent; } VkDevice vkd3d_get_vk_device(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device8((ID3D12Device8 *)device); return d3d12_device->vk_device; } VkPhysicalDevice vkd3d_get_vk_physical_device(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device8((ID3D12Device8 *)device); return d3d12_device->vk_physical_device; } struct vkd3d_instance *vkd3d_instance_from_device(ID3D12Device *device) { - struct d3d12_device *d3d12_device = impl_from_ID3D12Device7((ID3D12Device7 *)device); + struct d3d12_device *d3d12_device = impl_from_ID3D12Device8((ID3D12Device8 *)device); return d3d12_device->vkd3d_instance; } diff --git a/libs/vkd3d/libs/vkd3d/resource.c b/libs/vkd3d/libs/vkd3d/resource.c index 89764d0901d..446ef3ab0db 100644 --- a/libs/vkd3d/libs/vkd3d/resource.c +++ b/libs/vkd3d/libs/vkd3d/resource.c @@ -1857,6 +1857,7 @@ static bool d3d12_resource_validate_texture_alignment(const D3D12_RESOURCE_DESC1 HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3d12_device *device) { + const D3D12_MIP_REGION *mip_region = &desc->SamplerFeedbackMipRegion; const struct vkd3d_format *format; switch (desc->Dimension) @@ -1926,6 +1927,12 @@ HRESULT d3d12_resource_validate_desc(const D3D12_RESOURCE_DESC1 *desc, struct d3 d3d12_validate_resource_flags(desc->Flags); + if (mip_region->Width && mip_region->Height && mip_region->Depth) + { + FIXME("Unhandled sampler feedback mip region size (%u, %u, %u).\n", mip_region->Width, mip_region->Height, + mip_region->Depth); + } + return S_OK; } @@ -2253,7 +2260,7 @@ HRESULT d3d12_reserved_resource_create(struct d3d12_device *device, HRESULT vkd3d_create_image_resource(ID3D12Device *device, const struct vkd3d_image_resource_create_info *create_info, ID3D12Resource **resource) { - struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device7((ID3D12Device7 *)device); + struct d3d12_device *d3d12_device = unsafe_impl_from_ID3D12Device8((ID3D12Device8 *)device); struct d3d12_resource *object; HRESULT hr; diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_main.c b/libs/vkd3d/libs/vkd3d/vkd3d_main.c index 7919b7d8760..f6925d47bdf 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_main.c +++ b/libs/vkd3d/libs/vkd3d/vkd3d_main.c @@ -71,11 +71,11 @@ HRESULT vkd3d_create_device(const struct vkd3d_device_create_info *create_info, if (!device) { - ID3D12Device_Release(&object->ID3D12Device7_iface); + ID3D12Device_Release(&object->ID3D12Device8_iface); return S_FALSE; } - return return_interface(&object->ID3D12Device7_iface, &IID_ID3D12Device, iid, device); + return return_interface(&object->ID3D12Device8_iface, &IID_ID3D12Device, iid, device); } /* ID3D12RootSignatureDeserializer */ diff --git a/libs/vkd3d/libs/vkd3d/vkd3d_private.h b/libs/vkd3d/libs/vkd3d/vkd3d_private.h index b092bb26ded..39d892a6fa7 100644 --- a/libs/vkd3d/libs/vkd3d/vkd3d_private.h +++ b/libs/vkd3d/libs/vkd3d/vkd3d_private.h @@ -55,7 +55,7 @@ #define VKD3D_MAX_COMPATIBLE_FORMAT_COUNT 6u #define VKD3D_MAX_QUEUE_FAMILY_COUNT 3u -#define VKD3D_MAX_SHADER_EXTENSIONS 4u +#define VKD3D_MAX_SHADER_EXTENSIONS 5u #define VKD3D_MAX_SHADER_STAGES 5u #define VKD3D_MAX_VK_SYNC_OBJECTS 4u #define VKD3D_MAX_DEVICE_BLOCKED_QUEUES 16u @@ -133,6 +133,7 @@ struct vkd3d_vulkan_info bool EXT_debug_marker; bool EXT_depth_clip_enable; bool EXT_descriptor_indexing; + bool EXT_fragment_shader_interlock; bool EXT_mutable_descriptor_type; bool EXT_robustness2; bool EXT_shader_demote_to_helper_invocation; @@ -202,36 +203,11 @@ union vkd3d_thread_handle void *handle; }; -struct vkd3d_mutex -{ - CRITICAL_SECTION lock; -}; - struct vkd3d_cond { CONDITION_VARIABLE cond; }; -static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) -{ - InitializeCriticalSection(&lock->lock); -} - -static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) -{ - EnterCriticalSection(&lock->lock); -} - -static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) -{ - LeaveCriticalSection(&lock->lock); -} - -static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) -{ - DeleteCriticalSection(&lock->lock); -} - static inline void vkd3d_cond_init(struct vkd3d_cond *cond) { InitializeConditionVariable(&cond->cond); @@ -287,53 +263,11 @@ union vkd3d_thread_handle void *handle; }; -struct vkd3d_mutex -{ - pthread_mutex_t lock; -}; - struct vkd3d_cond { pthread_cond_t cond; }; - -static inline void vkd3d_mutex_init(struct vkd3d_mutex *lock) -{ - int ret; - - ret = pthread_mutex_init(&lock->lock, NULL); - if (ret) - ERR("Could not initialize the mutex, error %d.\n", ret); -} - -static inline void vkd3d_mutex_lock(struct vkd3d_mutex *lock) -{ - int ret; - - ret = pthread_mutex_lock(&lock->lock); - if (ret) - ERR("Could not lock the mutex, error %d.\n", ret); -} - -static inline void vkd3d_mutex_unlock(struct vkd3d_mutex *lock) -{ - int ret; - - ret = pthread_mutex_unlock(&lock->lock); - if (ret) - ERR("Could not unlock the mutex, error %d.\n", ret); -} - -static inline void vkd3d_mutex_destroy(struct vkd3d_mutex *lock) -{ - int ret; - - ret = pthread_mutex_destroy(&lock->lock); - if (ret) - ERR("Could not destroy the mutex, error %d.\n", ret); -} - static inline void vkd3d_cond_init(struct vkd3d_cond *cond) { int ret; @@ -1735,7 +1669,7 @@ struct vkd3d_desc_object_cache /* ID3D12Device */ struct d3d12_device { - ID3D12Device7 ID3D12Device7_iface; + ID3D12Device8 ID3D12Device8_iface; unsigned int refcount; VkDevice vk_device; @@ -1810,29 +1744,29 @@ struct vkd3d_queue *d3d12_device_get_vkd3d_queue(struct d3d12_device *device, D3 bool d3d12_device_is_uma(struct d3d12_device *device, bool *coherent); void d3d12_device_mark_as_removed(struct d3d12_device *device, HRESULT reason, const char *message, ...) VKD3D_PRINTF_FUNC(3, 4); -struct d3d12_device *unsafe_impl_from_ID3D12Device7(ID3D12Device7 *iface); +struct d3d12_device *unsafe_impl_from_ID3D12Device8(ID3D12Device8 *iface); HRESULT d3d12_device_add_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); void d3d12_device_remove_descriptor_heap(struct d3d12_device *device, struct d3d12_descriptor_heap *heap); static inline HRESULT d3d12_device_query_interface(struct d3d12_device *device, REFIID iid, void **object) { - return ID3D12Device7_QueryInterface(&device->ID3D12Device7_iface, iid, object); + return ID3D12Device8_QueryInterface(&device->ID3D12Device8_iface, iid, object); } static inline ULONG d3d12_device_add_ref(struct d3d12_device *device) { - return ID3D12Device7_AddRef(&device->ID3D12Device7_iface); + return ID3D12Device8_AddRef(&device->ID3D12Device8_iface); } static inline ULONG d3d12_device_release(struct d3d12_device *device) { - return ID3D12Device7_Release(&device->ID3D12Device7_iface); + return ID3D12Device8_Release(&device->ID3D12Device8_iface); } static inline unsigned int d3d12_device_get_descriptor_handle_increment_size(struct d3d12_device *device, D3D12_DESCRIPTOR_HEAP_TYPE descriptor_type) { - return ID3D12Device7_GetDescriptorHandleIncrementSize(&device->ID3D12Device7_iface, descriptor_type); + return ID3D12Device8_GetDescriptorHandleIncrementSize(&device->ID3D12Device8_iface, descriptor_type); } /* utils */ -- 2.43.0