wine-staging/patches/vkd3d-latest/0007-Updated-vkd3d-to-756b98f093ba26e8cd4d4fed1caa04a5c0d.patch

3254 lines
128 KiB
Diff
Raw Normal View History

2024-11-13 11:35:51 -08:00
From a38de601ec795892cf1b281f11f4320c65518774 Mon Sep 17 00:00:00 2001
2024-11-06 17:25:06 -08:00
From: Alistair Leslie-Hughes <leslie_alistair@hotmail.com>
Date: Thu, 7 Nov 2024 12:23:53 +1100
Subject: [PATCH] Updated vkd3d to 756b98f093ba26e8cd4d4fed1caa04a5c0d0bc35.
---
libs/vkd3d/libs/vkd3d-shader/fx.c | 47 +-
libs/vkd3d/libs/vkd3d-shader/glsl.c | 133 +-
libs/vkd3d/libs/vkd3d-shader/hlsl.c | 6 +
libs/vkd3d/libs/vkd3d-shader/hlsl.l | 1 +
libs/vkd3d/libs/vkd3d-shader/hlsl.y | 113 +-
libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c | 954 ++++++++++++-
libs/vkd3d/libs/vkd3d-shader/ir.c | 25 +-
libs/vkd3d/libs/vkd3d-shader/spirv.c | 11 +-
libs/vkd3d/libs/vkd3d-shader/tpf.c | 1182 +++--------------
.../libs/vkd3d-shader/vkd3d_shader_private.h | 1 +
libs/vkd3d/libs/vkd3d/command.c | 30 +-
11 files changed, 1392 insertions(+), 1111 deletions(-)
diff --git a/libs/vkd3d/libs/vkd3d-shader/fx.c b/libs/vkd3d/libs/vkd3d-shader/fx.c
index 5382dd94f98..9b1ef3bb2e0 100644
--- a/libs/vkd3d/libs/vkd3d-shader/fx.c
+++ b/libs/vkd3d/libs/vkd3d-shader/fx.c
@@ -514,6 +514,8 @@ enum fx_4_type_constants
FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY = 0xf,
FX_4_OBJECT_TYPE_TEXTURE_3D = 0x10,
FX_4_OBJECT_TYPE_TEXTURE_CUBE = 0x11,
+ FX_4_OBJECT_TYPE_RTV = 0x13,
+ FX_4_OBJECT_TYPE_DSV = 0x14,
FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY = 0x17,
FX_5_OBJECT_TYPE_GEOMETRY_SHADER = 0x1b,
@@ -527,7 +529,12 @@ enum fx_4_type_constants
FX_5_OBJECT_TYPE_UAV_2DARRAY = 0x22,
FX_5_OBJECT_TYPE_UAV_3D = 0x23,
FX_5_OBJECT_TYPE_UAV_BUFFER = 0x24,
+ FX_5_OBJECT_TYPE_SRV_RAW_BUFFER = 0x25,
+ FX_5_OBJECT_TYPE_UAV_RAW_BUFFER = 0x26,
+ FX_5_OBJECT_TYPE_SRV_STRUCTURED_BUFFER = 0x27,
FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER = 0x28,
+ FX_5_OBJECT_TYPE_SRV_APPEND_STRUCTURED_BUFFER = 0x2b,
+ FX_5_OBJECT_TYPE_SRV_CONSUME_STRUCTURED_BUFFER = 0x2c,
/* Types */
FX_4_TYPE_CLASS_NUMERIC = 1,
@@ -613,6 +620,7 @@ static const char * get_fx_4_type_name(const struct hlsl_type *type)
[HLSL_SAMPLER_DIM_3D] = "RWTexture3D",
[HLSL_SAMPLER_DIM_BUFFER] = "RWBuffer",
[HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = "RWStructuredBuffer",
+ [HLSL_SAMPLER_DIM_RAW_BUFFER] = "RWByteAddressBuffer",
};
switch (type->class)
@@ -821,17 +829,18 @@ static uint32_t write_fx_4_type(const struct hlsl_type *type, struct fx_write_co
[HLSL_SAMPLER_DIM_3D] = FX_5_OBJECT_TYPE_UAV_3D,
[HLSL_SAMPLER_DIM_BUFFER] = FX_5_OBJECT_TYPE_UAV_BUFFER,
[HLSL_SAMPLER_DIM_STRUCTURED_BUFFER] = FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER,
+ [HLSL_SAMPLER_DIM_RAW_BUFFER] = FX_5_OBJECT_TYPE_UAV_RAW_BUFFER,
};
put_u32_unaligned(buffer, uav_type[element_type->sampler_dim]);
}
else if (element_type->class == HLSL_CLASS_DEPTH_STENCIL_VIEW)
{
- put_u32_unaligned(buffer, 20);
+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_DSV);
}
else if (element_type->class == HLSL_CLASS_RENDER_TARGET_VIEW)
{
- put_u32_unaligned(buffer, 19);
+ put_u32_unaligned(buffer, FX_4_OBJECT_TYPE_RTV);
}
else if (element_type->class == HLSL_CLASS_PIXEL_SHADER)
{
@@ -3315,27 +3324,19 @@ static void fx_4_parse_shader_initializer(struct fx_parser *parser, unsigned int
vkd3d_shader_free_shader_code(&output);
}
-static bool fx_4_is_shader_resource(const struct fx_4_binary_type *type)
+static bool fx_4_object_has_initializer(const struct fx_4_binary_type *type)
{
switch (type->typeinfo)
{
- case FX_4_OBJECT_TYPE_TEXTURE:
- case FX_4_OBJECT_TYPE_TEXTURE_1D:
- case FX_4_OBJECT_TYPE_TEXTURE_1DARRAY:
- case FX_4_OBJECT_TYPE_TEXTURE_2D:
- case FX_4_OBJECT_TYPE_TEXTURE_2DARRAY:
- case FX_4_OBJECT_TYPE_TEXTURE_2DMS:
- case FX_4_OBJECT_TYPE_TEXTURE_2DMSARRAY:
- case FX_4_OBJECT_TYPE_TEXTURE_3D:
- case FX_4_OBJECT_TYPE_TEXTURE_CUBE:
- case FX_4_OBJECT_TYPE_TEXTURE_CUBEARRAY:
- case FX_5_OBJECT_TYPE_UAV_1D:
- case FX_5_OBJECT_TYPE_UAV_1DARRAY:
- case FX_5_OBJECT_TYPE_UAV_2D:
- case FX_5_OBJECT_TYPE_UAV_2DARRAY:
- case FX_5_OBJECT_TYPE_UAV_3D:
- case FX_5_OBJECT_TYPE_UAV_BUFFER:
- case FX_5_OBJECT_TYPE_UAV_STRUCTURED_BUFFER:
+ case FX_4_OBJECT_TYPE_STRING:
+ case FX_4_OBJECT_TYPE_PIXEL_SHADER:
+ case FX_4_OBJECT_TYPE_VERTEX_SHADER:
+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER:
+ case FX_4_OBJECT_TYPE_GEOMETRY_SHADER_SO:
+ case FX_5_OBJECT_TYPE_GEOMETRY_SHADER:
+ case FX_5_OBJECT_TYPE_COMPUTE_SHADER:
+ case FX_5_OBJECT_TYPE_HULL_SHADER:
+ case FX_5_OBJECT_TYPE_DOMAIN_SHADER:
return true;
default:
return false;
@@ -3347,6 +3348,9 @@ static void fx_4_parse_object_initializer(struct fx_parser *parser, const struct
unsigned int i, element_count;
uint32_t value;
+ if (!fx_4_object_has_initializer(type))
+ return;
+
vkd3d_string_buffer_printf(&parser->buffer, " = {\n");
element_count = max(type->element_count, 1);
for (i = 0; i < element_count; ++i)
@@ -3407,8 +3411,7 @@ static void fx_4_parse_objects(struct fx_parser *parser)
if (type.element_count)
vkd3d_string_buffer_printf(&parser->buffer, "[%u]", type.element_count);
- if (!fx_4_is_shader_resource(&type))
- fx_4_parse_object_initializer(parser, &type);
+ fx_4_parse_object_initializer(parser, &type);
vkd3d_string_buffer_printf(&parser->buffer, ";\n");
fx_parse_fx_4_annotations(parser);
diff --git a/libs/vkd3d/libs/vkd3d-shader/glsl.c b/libs/vkd3d/libs/vkd3d-shader/glsl.c
index 363054cb6d9..0df0e30f399 100644
--- a/libs/vkd3d/libs/vkd3d-shader/glsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/glsl.c
@@ -763,16 +763,37 @@ static void shader_glsl_default(struct vkd3d_glsl_generator *gen)
vkd3d_string_buffer_printf(gen->buffer, "default:\n");
}
+static void shader_glsl_print_texel_offset(struct vkd3d_string_buffer *buffer, struct vkd3d_glsl_generator *gen,
+ unsigned int offset_size, const struct vkd3d_shader_texel_offset *offset)
+{
+ switch (offset_size)
+ {
+ case 1:
+ vkd3d_string_buffer_printf(buffer, "%d", offset->u);
+ break;
+ case 2:
+ vkd3d_string_buffer_printf(buffer, "ivec2(%d, %d)", offset->u, offset->v);
+ break;
+ default:
+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
+ "Internal compiler error: Invalid texel offset size %u.", offset_size);
+ /* fall through */
+ case 3:
+ vkd3d_string_buffer_printf(buffer, "ivec3(%d, %d, %d)", offset->u, offset->v, offset->w);
+ break;
+ }
+}
+
static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
{
+ unsigned int resource_id, resource_idx, resource_space, sample_count;
const struct glsl_resource_type_info *resource_type_info;
- unsigned int resource_id, resource_idx, resource_space;
const struct vkd3d_shader_descriptor_info1 *d;
enum vkd3d_shader_component_type sampled_type;
enum vkd3d_shader_resource_type resource_type;
struct vkd3d_string_buffer *fetch;
enum vkd3d_data_type data_type;
- struct glsl_src coord, lod;
+ struct glsl_src coord;
struct glsl_dst dst;
uint32_t coord_mask;
@@ -790,6 +811,7 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_
{
resource_type = d->resource_type;
resource_space = d->register_space;
+ sample_count = d->sample_count;
sampled_type = vkd3d_component_type_from_resource_data_type(d->resource_data_type);
data_type = vkd3d_data_type_from_component_type(sampled_type);
}
@@ -799,6 +821,7 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_
"Internal compiler error: Undeclared resource descriptor %u.", resource_id);
resource_space = 0;
resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D;
+ sample_count = 1;
data_type = VKD3D_DATA_FLOAT;
}
@@ -815,7 +838,6 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_
glsl_dst_init(&dst, gen, ins, &ins->dst[0]);
glsl_src_init(&coord, gen, &ins->src[0], coord_mask);
- glsl_src_init(&lod, gen, &ins->src[0], VKD3DSP_WRITEMASK_3);
fetch = vkd3d_string_buffer_get(&gen->string_buffers);
vkd3d_string_buffer_printf(fetch, "texelFetch(");
@@ -823,14 +845,23 @@ static void shader_glsl_ld(struct vkd3d_glsl_generator *gen, const struct vkd3d_
resource_space, VKD3D_SHADER_DUMMY_SAMPLER_INDEX, 0);
vkd3d_string_buffer_printf(fetch, ", %s", coord.str->buffer);
if (resource_type != VKD3D_SHADER_RESOURCE_BUFFER)
- vkd3d_string_buffer_printf(fetch, ", %s", lod.str->buffer);
+ {
+ vkd3d_string_buffer_printf(fetch, ", ");
+ if (ins->opcode != VKD3DSIH_LD2DMS)
+ shader_glsl_print_src(fetch, gen, &ins->src[0], VKD3DSP_WRITEMASK_3, ins->src[0].reg.data_type);
+ else if (sample_count == 1)
+ /* If the resource isn't a true multisample resource, this is the
+ * "lod" parameter instead of the "sample" parameter. */
+ vkd3d_string_buffer_printf(fetch, "0");
+ else
+ shader_glsl_print_src(fetch, gen, &ins->src[2], VKD3DSP_WRITEMASK_0, ins->src[2].reg.data_type);
+ }
vkd3d_string_buffer_printf(fetch, ")");
shader_glsl_print_swizzle(fetch, ins->src[1].swizzle, ins->dst[0].write_mask);
shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", fetch->buffer);
vkd3d_string_buffer_release(&gen->string_buffers, fetch);
- glsl_src_cleanup(&lod, &gen->string_buffers);
glsl_src_cleanup(&coord, &gen->string_buffers);
glsl_dst_cleanup(&dst, &gen->string_buffers);
}
@@ -868,8 +899,9 @@ static void shader_glsl_print_shadow_coord(struct vkd3d_string_buffer *buffer, s
static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vkd3d_shader_instruction *ins)
{
- bool shadow_sampler, array, bias, gather, grad, lod, lod_zero, shadow;
+ bool shadow_sampler, array, bias, dynamic_offset, gather, grad, lod, lod_zero, offset, shadow;
const struct glsl_resource_type_info *resource_type_info;
+ const struct vkd3d_shader_src_param *resource, *sampler;
unsigned int resource_id, resource_idx, resource_space;
unsigned int sampler_id, sampler_idx, sampler_space;
const struct vkd3d_shader_descriptor_info1 *d;
@@ -881,23 +913,24 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk
struct glsl_dst dst;
bias = ins->opcode == VKD3DSIH_SAMPLE_B;
- gather = ins->opcode == VKD3DSIH_GATHER4;
+ dynamic_offset = ins->opcode == VKD3DSIH_GATHER4_PO;
+ gather = ins->opcode == VKD3DSIH_GATHER4 || ins->opcode == VKD3DSIH_GATHER4_PO;
grad = ins->opcode == VKD3DSIH_SAMPLE_GRAD;
lod = ins->opcode == VKD3DSIH_SAMPLE_LOD || ins->opcode == VKD3DSIH_SAMPLE_C_LZ;
lod_zero = ins->opcode == VKD3DSIH_SAMPLE_C_LZ;
+ offset = dynamic_offset || vkd3d_shader_instruction_has_texel_offset(ins);
shadow = ins->opcode == VKD3DSIH_SAMPLE_C || ins->opcode == VKD3DSIH_SAMPLE_C_LZ;
- if (vkd3d_shader_instruction_has_texel_offset(ins))
- vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
- "Internal compiler error: Unhandled texel sample offset.");
+ resource = &ins->src[1 + dynamic_offset];
+ sampler = &ins->src[2 + dynamic_offset];
- if (ins->src[1].reg.idx[0].rel_addr || ins->src[1].reg.idx[1].rel_addr
- || ins->src[2].reg.idx[0].rel_addr || ins->src[2].reg.idx[1].rel_addr)
+ if (resource->reg.idx[0].rel_addr || resource->reg.idx[1].rel_addr
+ || sampler->reg.idx[0].rel_addr || sampler->reg.idx[1].rel_addr)
vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED,
"Descriptor indexing is not supported.");
- resource_id = ins->src[1].reg.idx[0].offset;
- resource_idx = ins->src[1].reg.idx[1].offset;
+ resource_id = resource->reg.idx[0].offset;
+ resource_idx = resource->reg.idx[1].offset;
if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SRV, resource_id)))
{
resource_type = d->resource_type;
@@ -927,8 +960,8 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk
array = false;
}
- sampler_id = ins->src[2].reg.idx[0].offset;
- sampler_idx = ins->src[2].reg.idx[1].offset;
+ sampler_id = sampler->reg.idx[0].offset;
+ sampler_idx = sampler->reg.idx[1].offset;
if ((d = shader_glsl_get_descriptor_by_id(gen, VKD3D_SHADER_DESCRIPTOR_TYPE_SAMPLER, sampler_id)))
{
sampler_space = d->register_space;
@@ -958,13 +991,14 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk
sample = vkd3d_string_buffer_get(&gen->string_buffers);
if (gather)
- vkd3d_string_buffer_printf(sample, "textureGather(");
+ vkd3d_string_buffer_printf(sample, "textureGather");
else if (grad)
- vkd3d_string_buffer_printf(sample, "textureGrad(");
+ vkd3d_string_buffer_printf(sample, "textureGrad");
else if (lod)
- vkd3d_string_buffer_printf(sample, "textureLod(");
+ vkd3d_string_buffer_printf(sample, "textureLod");
else
- vkd3d_string_buffer_printf(sample, "texture(");
+ vkd3d_string_buffer_printf(sample, "texture");
+ vkd3d_string_buffer_printf(sample, "%s(", offset ? "Offset" : "");
shader_glsl_print_combined_sampler_name(sample, gen, resource_idx, resource_space, sampler_idx, sampler_space);
vkd3d_string_buffer_printf(sample, ", ");
if (shadow)
@@ -985,18 +1019,32 @@ static void shader_glsl_sample(struct vkd3d_glsl_generator *gen, const struct vk
{
vkd3d_string_buffer_printf(sample, ", 0.0");
}
- else if (bias || lod)
+ else if (lod)
{
vkd3d_string_buffer_printf(sample, ", ");
shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type);
}
- if (gather)
+ if (offset)
+ {
+ vkd3d_string_buffer_printf(sample, ", ");
+ if (dynamic_offset)
+ shader_glsl_print_src(sample, gen, &ins->src[1],
+ vkd3d_write_mask_from_component_count(coord_size - array), ins->src[1].reg.data_type);
+ else
+ shader_glsl_print_texel_offset(sample, gen, coord_size - array, &ins->texel_offset);
+ }
+ if (bias)
{
- if ((component_idx = vsir_swizzle_get_component(ins->src[2].swizzle, 0)))
+ vkd3d_string_buffer_printf(sample, ", ");
+ shader_glsl_print_src(sample, gen, &ins->src[3], VKD3DSP_WRITEMASK_0, ins->src[3].reg.data_type);
+ }
+ else if (gather)
+ {
+ if ((component_idx = vsir_swizzle_get_component(sampler->swizzle, 0)))
vkd3d_string_buffer_printf(sample, ", %d", component_idx);
}
vkd3d_string_buffer_printf(sample, ")");
- shader_glsl_print_swizzle(sample, ins->src[1].swizzle, ins->dst[0].write_mask);
+ shader_glsl_print_swizzle(sample, resource->swizzle, ins->dst[0].write_mask);
shader_glsl_print_assignment_ext(gen, &dst, data_type, "%s", sample->buffer);
@@ -1268,7 +1316,13 @@ static void shader_glsl_print_sysval_name(struct vkd3d_string_buffer *buffer, st
"Internal compiler error: Unhandled SV_IS_FRONT_FACE in shader type #%x.", version->type);
vkd3d_string_buffer_printf(buffer,
"uintBitsToFloat(uvec4(gl_FrontFacing ? 0xffffffffu : 0u, 0u, 0u, 0u))");
+ break;
+ case VKD3D_SHADER_SV_SAMPLE_INDEX:
+ if (version->type != VKD3D_SHADER_TYPE_PIXEL)
+ vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
+ "Internal compiler error: Unhandled SV_SAMPLE_INDEX in shader type #%x.", version->type);
+ vkd3d_string_buffer_printf(buffer, "intBitsToFloat(ivec4(gl_SampleID, 0, 0, 0))");
break;
case VKD3D_SHADER_SV_TARGET:
@@ -1390,6 +1444,9 @@ static void shader_glsl_shader_epilogue(struct vkd3d_glsl_generator *gen)
case VKD3D_SHADER_COMPONENT_UINT:
vkd3d_string_buffer_printf(buffer, " = floatBitsToUint(%s_out[%u])", gen->prefix, e->register_index);
break;
+ case VKD3D_SHADER_COMPONENT_INT:
+ vkd3d_string_buffer_printf(buffer, " = floatBitsToInt(%s_out[%u])", gen->prefix, e->register_index);
+ break;
default:
vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
"Internal compiler error: Unhandled output component type %#x.", e->component_type);
@@ -1499,6 +1556,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen,
shader_glsl_cast(gen, ins, "uint", "uvec");
break;
case VKD3DSIH_GATHER4:
+ case VKD3DSIH_GATHER4_PO:
case VKD3DSIH_SAMPLE:
case VKD3DSIH_SAMPLE_B:
case VKD3DSIH_SAMPLE_C:
@@ -1553,6 +1611,7 @@ static void vkd3d_glsl_handle_instruction(struct vkd3d_glsl_generator *gen,
shader_glsl_cast(gen, ins, "float", "vec");
break;
case VKD3DSIH_LD:
+ case VKD3DSIH_LD2DMS:
shader_glsl_ld(gen, ins);
break;
case VKD3DSIH_LD_UAV_TYPED:
@@ -1911,6 +1970,7 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator
struct vkd3d_string_buffer *buffer = gen->buffer;
enum vkd3d_shader_component_type component_type;
const char *sampler_type, *sampler_type_prefix;
+ enum vkd3d_shader_resource_type resource_type;
unsigned int binding_idx;
bool shadow = false;
@@ -1936,18 +1996,32 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator
return;
}
- if ((resource_type_info = shader_glsl_get_resource_type_info(srv->resource_type)))
+ resource_type = srv->resource_type;
+ if (srv->sample_count == 1)
+ {
+ /* The OpenGL API distinguishes between multi-sample textures with
+ * sample count 1 and single-sample textures. Direct3D and Vulkan
+ * don't make this distinction at the API level, but Direct3D shaders
+ * are capable of expressing both. We therefore map such multi-sample
+ * textures to their single-sample equivalents here. */
+ if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMS)
+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D;
+ else if (resource_type == VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY)
+ resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY;
+ }
+
+ if ((resource_type_info = shader_glsl_get_resource_type_info(resource_type)))
{
sampler_type = resource_type_info->type_suffix;
if (shadow && !resource_type_info->shadow)
vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_UNSUPPORTED,
- "Comparison samplers are not supported with resource type %#x.", srv->resource_type);
+ "Comparison samplers are not supported with resource type %#x.", resource_type);
}
else
{
vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_INTERNAL,
"Internal compiler error: Unhandled resource type %#x for combined resource/sampler "
- "for resource %u, space %u and sampler %u, space %u.", srv->resource_type,
+ "for resource %u, space %u and sampler %u, space %u.", resource_type,
crs->resource_index, crs->resource_space, crs->sampler_index, crs->sampler_space);
sampler_type = "<unhandled sampler type>";
}
@@ -1972,7 +2046,7 @@ static void shader_glsl_generate_sampler_declaration(struct vkd3d_glsl_generator
break;
}
- if (!shader_glsl_get_combined_sampler_binding(gen, crs, srv->resource_type, &binding_idx))
+ if (!shader_glsl_get_combined_sampler_binding(gen, crs, resource_type, &binding_idx))
{
vkd3d_glsl_compiler_error(gen, VKD3D_SHADER_ERROR_GLSL_BINDING_NOT_FOUND,
"No descriptor binding specified for combined resource/sampler "
@@ -2213,6 +2287,9 @@ static void shader_glsl_generate_output_declarations(struct vkd3d_glsl_generator
case VKD3D_SHADER_COMPONENT_UINT:
vkd3d_string_buffer_printf(buffer, "uvec4");
break;
+ case VKD3D_SHADER_COMPONENT_INT:
+ vkd3d_string_buffer_printf(buffer, "ivec4");
+ break;
case VKD3D_SHADER_COMPONENT_FLOAT:
vkd3d_string_buffer_printf(buffer, "vec4");
break;
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.c b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
index 1f90a4ba805..96de18dc886 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.c
@@ -2774,6 +2774,12 @@ struct vkd3d_string_buffer *hlsl_type_to_string(struct hlsl_ctx *ctx, const stru
return string;
case HLSL_CLASS_TEXTURE:
+ if (type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
+ {
+ vkd3d_string_buffer_printf(string, "ByteAddressBuffer");
+ return string;
+ }
+
if (type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC)
{
vkd3d_string_buffer_printf(string, "Texture");
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.l b/libs/vkd3d/libs/vkd3d-shader/hlsl.l
index 18effcc5be1..8dace11916a 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.l
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.l
@@ -74,6 +74,7 @@ ANY (.)
BlendState {return KW_BLENDSTATE; }
break {return KW_BREAK; }
Buffer {return KW_BUFFER; }
+ByteAddressBuffer {return KW_BYTEADDRESSBUFFER; }
case {return KW_CASE; }
cbuffer {return KW_CBUFFER; }
centroid {return KW_CENTROID; }
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl.y b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
index dcbba46ede6..60aade732db 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl.y
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl.y
@@ -53,7 +53,7 @@ struct parse_parameter
struct parse_initializer initializer;
};
-struct parse_colon_attribute
+struct parse_colon_attributes
{
struct hlsl_semantic semantic;
struct hlsl_reg_reservation reg_reservation;
@@ -5175,6 +5175,10 @@ static bool intrinsic_GetRenderTargetSampleCount(struct hlsl_ctx *ctx,
struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
struct hlsl_ir_node *expr;
+ if (ctx->profile->type != VKD3D_SHADER_TYPE_PIXEL || hlsl_version_lt(ctx, 4, 1))
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
+ "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher.");
+
if (!(expr = hlsl_new_expr(ctx, HLSL_OP0_RASTERIZER_SAMPLE_COUNT,
operands, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)))
return false;
@@ -5599,6 +5603,55 @@ static bool raise_invalid_method_object_type(struct hlsl_ctx *ctx, const struct
return false;
}
+static bool add_raw_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object,
+ const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
+{
+ struct hlsl_resource_load_params load_params = {.type = HLSL_RESOURCE_LOAD};
+ struct hlsl_ir_node *load;
+ unsigned int value_dim;
+
+ if (params->args_count != 1 && params->args_count != 2)
+ {
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
+ "Wrong number of arguments to method 'Load': expected between 1 and 2, but got %u.",
+ params->args_count);
+ return false;
+ }
+
+ if (params->args_count == 2)
+ {
+ hlsl_fixme(ctx, loc, "Tiled resource status argument.");
+ return false;
+ }
+
+ if (params->args[0]->data_type->class != HLSL_CLASS_SCALAR)
+ {
+ hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Scalar address argument expected for '%s'.", name);
+ return false;
+ }
+
+ if (!strcmp(name, "Load"))
+ value_dim = 1;
+ else if (!strcmp(name, "Load2"))
+ value_dim = 2;
+ else if (!strcmp(name, "Load3"))
+ value_dim = 3;
+ else
+ value_dim = 4;
+
+ if (!(load_params.coords = add_implicit_conversion(ctx, block, params->args[0],
+ hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc)))
+ return false;
+
+ load_params.format = hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, value_dim);
+ load_params.resource = object;
+
+ if (!(load = hlsl_new_resource_load(ctx, &load_params, loc)))
+ return false;
+ hlsl_block_add_instr(block, load);
+ return true;
+}
+
static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_node *object,
const char *name, const struct parse_initializer *params, const struct vkd3d_shader_location *loc)
{
@@ -5608,6 +5661,9 @@ static bool add_load_method_call(struct hlsl_ctx *ctx, struct hlsl_block *block,
struct hlsl_ir_node *load;
bool multisampled;
+ if (object_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
+ return add_raw_load_method_call(ctx, block, object, name, params, loc);
+
if (object_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
{
hlsl_fixme(ctx, loc, "Method '%s' for structured buffers.", name);
@@ -6260,7 +6316,10 @@ texture_methods[] =
{ "GetDimensions", add_getdimensions_method_call, "00111111111110" },
- { "Load", add_load_method_call, "00111011110110" },
+ { "Load", add_load_method_call, "00111011110111" },
+ { "Load2", add_raw_load_method_call, "00000000000001" },
+ { "Load3", add_raw_load_method_call, "00000000000001" },
+ { "Load4", add_raw_load_method_call, "00000000000001" },
{ "Sample", add_sample_method_call, "00111111001000" },
{ "SampleBias", add_sample_lod_method_call, "00111111001000" },
@@ -6490,7 +6549,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
struct parse_if_body if_body;
enum parse_assign_op assign_op;
struct hlsl_reg_reservation reg_reservation;
- struct parse_colon_attribute colon_attribute;
+ struct parse_colon_attributes colon_attributes;
struct hlsl_semantic semantic;
enum hlsl_buffer_type buffer_type;
enum hlsl_sampler_dim sampler_dim;
@@ -6505,6 +6564,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
%token KW_BLENDSTATE
%token KW_BREAK
%token KW_BUFFER
+%token KW_BYTEADDRESSBUFFER
%token KW_CASE
%token KW_CONSTANTBUFFER
%token KW_CBUFFER
@@ -6687,7 +6747,7 @@ static void validate_uav_type(struct hlsl_ctx *ctx, enum hlsl_sampler_dim dim,
%type <buffer_type> buffer_type
-%type <colon_attribute> colon_attribute
+%type <colon_attributes> colon_attributes
%type <fields> field
%type <fields> fields_list
@@ -6875,7 +6935,7 @@ effect_group:
}
buffer_declaration:
- var_modifiers buffer_type any_identifier colon_attribute annotations_opt
+ var_modifiers buffer_type any_identifier colon_attributes annotations_opt
{
if ($4.semantic.name)
hlsl_error(ctx, &@4, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC, "Semantics are not allowed on buffers.");
@@ -7200,7 +7260,7 @@ func_declaration:
func_prototype_no_attrs:
/* var_modifiers is necessary to avoid shift/reduce conflicts. */
- var_modifiers type var_identifier '(' parameters ')' colon_attribute
+ var_modifiers type var_identifier '(' parameters ')' colon_attributes
{
uint32_t modifiers = $1;
struct hlsl_ir_var *var;
@@ -7377,28 +7437,39 @@ var_identifier:
VAR_IDENTIFIER
| NEW_IDENTIFIER
-colon_attribute:
+colon_attributes:
%empty
{
$$.semantic = (struct hlsl_semantic){0};
$$.reg_reservation.reg_type = 0;
$$.reg_reservation.offset_type = 0;
}
- | semantic
+ | colon_attributes semantic
{
- $$.semantic = $1;
- $$.reg_reservation.reg_type = 0;
- $$.reg_reservation.offset_type = 0;
+ hlsl_cleanup_semantic(&$$.semantic);
+ $$.semantic = $2;
}
- | register_reservation
+ | colon_attributes register_reservation
{
- $$.semantic = (struct hlsl_semantic){0};
- $$.reg_reservation = $1;
+ if ($$.reg_reservation.reg_type)
+ hlsl_fixme(ctx, &@2, "Multiple register() reservations.");
+
+ $$.reg_reservation.reg_type = $2.reg_type;
+ $$.reg_reservation.reg_index = $2.reg_index;
+ $$.reg_reservation.reg_space = $2.reg_space;
}
- | packoffset_reservation
+ | colon_attributes packoffset_reservation
{
- $$.semantic = (struct hlsl_semantic){0};
- $$.reg_reservation = $1;
+ if (ctx->cur_buffer == ctx->globals_buffer)
+ {
+ hlsl_error(ctx, &@2, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
+ "The packoffset() reservation is only allowed within 'cbuffer' blocks.");
+ }
+ else
+ {
+ $$.reg_reservation.offset_type = $2.offset_type;
+ $$.reg_reservation.offset_index = $2.offset_index;
+ }
}
semantic:
@@ -7594,7 +7665,7 @@ parameter:
}
parameter_decl:
- var_modifiers type_no_void any_identifier arrays colon_attribute
+ var_modifiers type_no_void any_identifier arrays colon_attributes
{
uint32_t modifiers = $1;
struct hlsl_type *type;
@@ -7863,6 +7934,10 @@ type_no_void:
$$ = hlsl_new_texture_type(ctx, $1, $3, sample_count);
}
+ | KW_BYTEADDRESSBUFFER
+ {
+ $$ = hlsl_new_texture_type(ctx, HLSL_SAMPLER_DIM_RAW_BUFFER, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), 0);
+ }
| uav_type '<' resource_format '>'
{
validate_uav_type(ctx, $1, $3, &@3);
@@ -8095,7 +8170,7 @@ variables_def_typed:
}
variable_decl:
- any_identifier arrays colon_attribute annotations_opt
+ any_identifier arrays colon_attributes annotations_opt
{
$$ = hlsl_alloc(ctx, sizeof(*$$));
$$->loc = @1;
diff --git a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
index 213e403dcbd..bea16fd4da6 100644
--- a/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
+++ b/libs/vkd3d/libs/vkd3d-shader/hlsl_codegen.c
@@ -6520,6 +6520,21 @@ static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_prog
}
mask = (1 << var->data_type->dimx) - 1;
+
+ if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output
+ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX)
+ {
+ if (var->data_type->dimx > 1)
+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
+ "PSIZE output must have only 1 component in this shader model.");
+ /* For some reason the writemask has all components set. */
+ mask = VKD3DSP_WRITEMASK_ALL;
+ }
+ if (!ascii_strcasecmp(var->semantic.name, "FOG") && output && program->shader_version.major < 3
+ && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->dimx > 1)
+ hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
+ "FOG output must have only 1 component in this shader model.");
+
use_mask = mask; /* FIXME: retrieve use mask accurately. */
component_type = VKD3D_SHADER_COMPONENT_FLOAT;
}
@@ -6817,6 +6832,16 @@ static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src,
}
}
+static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst,
+ struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr)
+{
+ VKD3D_ASSERT(instr->reg.allocated);
+ vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
+ dst->reg.idx[0].offset = instr->reg.id;
+ dst->reg.dimension = VSIR_DIMENSION_VEC4;
+ dst->write_mask = instr->reg.writemask;
+}
+
static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx,
struct vsir_program *program, struct hlsl_ir_constant *constant)
{
@@ -6842,6 +6867,25 @@ static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx,
dst_param->write_mask = instr->reg.writemask;
}
+static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_expr *expr)
+{
+ struct vkd3d_shader_src_param *src_param;
+ struct hlsl_ir_node *instr = &expr->node;
+ struct vkd3d_shader_instruction *ins;
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1)))
+ return;
+ ins->flags = VKD3DSI_SAMPLE_INFO_UINT;
+
+ vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
+
+ src_param = &ins->src[0];
+ vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0);
+ src_param->reg.dimension = VSIR_DIMENSION_VEC4;
+ src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
+}
+
/* Translate ops that can be mapped to a single vsir instruction with only one dst register. */
static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx,
struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode,
@@ -6866,10 +6910,7 @@ static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx,
return;
dst_param = &ins->dst[0];
- vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
- dst_param->reg.idx[0].offset = instr->reg.id;
- dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
- dst_param->write_mask = instr->reg.writemask;
+ vsir_dst_from_hlsl_node(dst_param, ctx, instr);
dst_param->modifiers = dst_mod;
for (i = 0; i < src_count; ++i)
@@ -7216,6 +7257,8 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx,
if (deref->var->is_output_semantic)
{
+ const char *semantic_name = deref->var->semantic.name;
+
version.major = ctx->profile->major_version;
version.minor = ctx->profile->minor_version;
version.type = ctx->profile->type;
@@ -7225,7 +7268,7 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx,
type = VKD3DSPR_TEMP;
register_index = 0;
}
- else if (!sm1_register_from_semantic_name(&version, deref->var->semantic.name,
+ else if (!sm1_register_from_semantic_name(&version, semantic_name,
deref->var->semantic.index, true, &type, &register_index))
{
VKD3D_ASSERT(reg.allocated);
@@ -7234,6 +7277,14 @@ static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx,
}
else
writemask = (1u << deref->var->data_type->dimx) - 1;
+
+ if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE")
+ || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3)))
+ {
+ /* These are always 1-component, but for some reason are written
+ * with a writemask containing all components. */
+ writemask = VKD3DSP_WRITEMASK_ALL;
+ }
}
else
VKD3D_ASSERT(reg.allocated);
@@ -7642,6 +7693,123 @@ static void replace_instr_with_last_vsir_instr(struct hlsl_ctx *ctx,
hlsl_replace_node(instr, vsir_instr);
}
+static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program,
+ const struct hlsl_ir_var *var, bool is_patch_constant_func, struct hlsl_block *block,
+ const struct vkd3d_shader_location *loc)
+{
+ const struct vkd3d_shader_version *version = &program->shader_version;
+ const bool output = var->is_output_semantic;
+ enum vkd3d_shader_sysval_semantic semantic;
+ struct vkd3d_shader_dst_param *dst_param;
+ struct vkd3d_shader_instruction *ins;
+ enum vkd3d_shader_register_type type;
+ enum vkd3d_shader_opcode opcode;
+ uint32_t write_mask;
+ unsigned int idx;
+ bool has_idx;
+
+ sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping,
+ ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func);
+ if (semantic == ~0u)
+ semantic = VKD3D_SHADER_SV_NONE;
+
+ if (var->is_input_semantic)
+ {
+ switch (semantic)
+ {
+ case VKD3D_SHADER_SV_NONE:
+ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
+ ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT;
+ break;
+
+ case VKD3D_SHADER_SV_INSTANCE_ID:
+ case VKD3D_SHADER_SV_IS_FRONT_FACE:
+ case VKD3D_SHADER_SV_PRIMITIVE_ID:
+ case VKD3D_SHADER_SV_SAMPLE_INDEX:
+ case VKD3D_SHADER_SV_VERTEX_ID:
+ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
+ ? VKD3DSIH_DCL_INPUT_PS_SGV : VKD3DSIH_DCL_INPUT_SGV;
+ break;
+
+ default:
+ opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
+ ? VKD3DSIH_DCL_INPUT_PS_SIV : VKD3DSIH_DCL_INPUT_SIV;
+ break;
+ }
+ }
+ else
+ {
+ if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL)
+ opcode = VKD3DSIH_DCL_OUTPUT;
+ else
+ opcode = VKD3DSIH_DCL_OUTPUT_SIV;
+ }
+
+ if (sm4_register_from_semantic_name(version, var->semantic.name, output, &type, &has_idx))
+ {
+ if (has_idx)
+ idx = var->semantic.index;
+ write_mask = (1u << var->data_type->dimx) - 1;
+ }
+ else
+ {
+ if (output)
+ type = VKD3DSPR_OUTPUT;
+ else if (version->type == VKD3D_SHADER_TYPE_DOMAIN)
+ type = VKD3DSPR_PATCHCONST;
+ else
+ type = VKD3DSPR_INPUT;
+
+ has_idx = true;
+ idx = var->regs[HLSL_REGSET_NUMERIC].id;
+ write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask;
+ }
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, opcode, 0, 0)))
+ return;
+
+ if (opcode == VKD3DSIH_DCL_OUTPUT)
+ {
+ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE
+ || semantic == VKD3D_SHADER_SV_TARGET || type != VKD3DSPR_OUTPUT);
+ dst_param = &ins->declaration.dst;
+ }
+ else if (opcode == VKD3DSIH_DCL_INPUT || opcode == VKD3DSIH_DCL_INPUT_PS)
+ {
+ VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE);
+ dst_param = &ins->declaration.dst;
+ }
+ else
+ {
+ VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE);
+ ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval_indexed(semantic,
+ var->semantic.index);
+ dst_param = &ins->declaration.register_semantic.reg;
+ }
+
+ if (has_idx)
+ {
+ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1);
+ dst_param->reg.idx[0].offset = idx;
+ }
+ else
+ {
+ vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0);
+ }
+
+ if (shader_sm4_is_scalar_register(&dst_param->reg))
+ dst_param->reg.dimension = VSIR_DIMENSION_SCALAR;
+ else
+ dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
+
+ dst_param->write_mask = write_mask;
+
+ if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL)
+ ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers);
+
+ add_last_vsir_instr_to_block(ctx, program, block);
+}
+
static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program,
uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc)
{
@@ -7674,76 +7842,754 @@ static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx,
add_last_vsir_instr_to_block(ctx, program, block);
}
-static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx,
- struct vsir_program *program, struct hlsl_ir_expr *expr)
+static bool type_is_float(const struct hlsl_type *type)
{
- switch (expr->op)
- {
- case HLSL_OP1_ABS:
- generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true);
- return true;
+ return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF;
+}
- default:
- return false;
- }
+static bool type_is_integer(const struct hlsl_type *type)
+{
+ return type->e.numeric.type == HLSL_TYPE_BOOL
+ || type->e.numeric.type == HLSL_TYPE_INT
+ || type->e.numeric.type == HLSL_TYPE_UINT;
}
-static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program)
+static void sm4_generate_vsir_cast_from_bool(struct hlsl_ctx *ctx, struct vsir_program *program,
+ const struct hlsl_ir_expr *expr, uint32_t bits)
{
- struct hlsl_ir_node *instr, *next;
+ struct hlsl_ir_node *operand = expr->operands[0].node;
+ const struct hlsl_ir_node *instr = &expr->node;
+ struct vkd3d_shader_dst_param *dst_param;
+ struct hlsl_constant_value value = {0};
+ struct vkd3d_shader_instruction *ins;
- LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
+ VKD3D_ASSERT(instr->reg.allocated);
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_AND, 1, 2)))
+ return;
+
+ dst_param = &ins->dst[0];
+ vsir_dst_from_hlsl_node(dst_param, ctx, instr);
+
+ vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, dst_param->write_mask);
+
+ value.u[0].u = bits;
+ vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, VKD3D_DATA_UINT, 1, 0);
+}
+
+static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_expr *expr)
+{
+ const struct hlsl_ir_node *arg1 = expr->operands[0].node;
+ const struct hlsl_type *dst_type = expr->node.data_type;
+ const struct hlsl_type *src_type = arg1->data_type;
+
+ static const union
{
- if (instr->data_type)
- {
- if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
+ uint32_t u;
+ float f;
+ } one = { .f = 1.0 };
+
+ /* Narrowing casts were already lowered. */
+ VKD3D_ASSERT(src_type->dimx == dst_type->dimx);
+
+ switch (dst_type->e.numeric.type)
+ {
+ case HLSL_TYPE_HALF:
+ case HLSL_TYPE_FLOAT:
+ switch (src_type->e.numeric.type)
{
- hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class);
- break;
+ case HLSL_TYPE_HALF:
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_INT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ITOF, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_UINT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UTOF, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_BOOL:
+ sm4_generate_vsir_cast_from_bool(ctx, program, expr, one.u);
+ return true;
+
+ case HLSL_TYPE_DOUBLE:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float.");
+ return false;
+
+ default:
+ vkd3d_unreachable();
}
- }
+ break;
- switch (instr->type)
- {
- case HLSL_IR_CALL:
- vkd3d_unreachable();
+ case HLSL_TYPE_INT:
+ switch (src_type->e.numeric.type)
+ {
+ case HLSL_TYPE_HALF:
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOI, 0, 0, true);
+ return true;
- case HLSL_IR_CONSTANT:
- /* In SM4 all constants are inlined. */
- break;
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
+ return true;
- case HLSL_IR_EXPR:
- if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr)))
- replace_instr_with_last_vsir_instr(ctx, program, instr);
- break;
+ case HLSL_TYPE_BOOL:
+ sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u);
+ return true;
- case HLSL_IR_SWIZZLE:
- generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr));
- replace_instr_with_last_vsir_instr(ctx, program, instr);
- break;
+ case HLSL_TYPE_DOUBLE:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int.");
+ return false;
- default:
- break;
- }
+ default:
+ vkd3d_unreachable();
+ }
+ break;
+
+ case HLSL_TYPE_UINT:
+ switch (src_type->e.numeric.type)
+ {
+ case HLSL_TYPE_HALF:
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOU, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_BOOL:
+ sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u);
+ return true;
+
+ case HLSL_TYPE_DOUBLE:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint.");
+ return false;
+
+ default:
+ vkd3d_unreachable();
+ }
+ break;
+
+ case HLSL_TYPE_DOUBLE:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double.");
+ return false;
+
+ case HLSL_TYPE_BOOL:
+ /* Casts to bool should have already been lowered. */
+ default:
+ vkd3d_unreachable();
}
}
-static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx,
- struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program)
+static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program,
+ enum vkd3d_shader_opcode opcode, const struct hlsl_ir_expr *expr, unsigned int dst_idx)
{
- struct hlsl_block block = {0};
- struct hlsl_scope *scope;
- struct hlsl_ir_var *var;
- uint32_t temp_count;
+ struct vkd3d_shader_dst_param *dst_param, *null_param;
+ const struct hlsl_ir_node *instr = &expr->node;
+ struct vkd3d_shader_instruction *ins;
+ unsigned int i, src_count;
- compute_liveness(ctx, func);
- mark_indexable_vars(ctx, func);
- temp_count = allocate_temp_registers(ctx, func);
- if (ctx->result)
+ VKD3D_ASSERT(instr->reg.allocated);
+
+ for (i = 0; i < HLSL_MAX_OPERANDS; ++i)
+ {
+ if (expr->operands[i].node)
+ src_count = i + 1;
+ }
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 2, src_count)))
return;
- program->temp_count = max(program->temp_count, temp_count);
- hlsl_block_init(&block);
+ dst_param = &ins->dst[dst_idx];
+ vsir_dst_from_hlsl_node(dst_param, ctx, instr);
+
+ null_param = &ins->dst[1 - dst_idx];
+ vsir_dst_param_init(null_param, VKD3DSPR_NULL, VKD3D_DATA_FLOAT, 0);
+ null_param->reg.dimension = VSIR_DIMENSION_NONE;
+
+ for (i = 0; i < src_count; ++i)
+ vsir_src_from_hlsl_node(&ins->src[i], ctx, expr->operands[i].node, dst_param->write_mask);
+}
+
+static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx,
+ struct vsir_program *program, const struct hlsl_ir_expr *expr)
+{
+ struct hlsl_ir_node *operand = expr->operands[0].node;
+ const struct hlsl_ir_node *instr = &expr->node;
+ struct vkd3d_shader_dst_param *dst_param;
+ struct hlsl_constant_value value = {0};
+ struct vkd3d_shader_instruction *ins;
+
+ VKD3D_ASSERT(type_is_float(expr->node.data_type));
+
+ if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DIV, 1, 2)))
+ return;
+
+ dst_param = &ins->dst[0];
+ vsir_dst_from_hlsl_node(dst_param, ctx, instr);
+
+ value.u[0].f = 1.0f;
+ value.u[1].f = 1.0f;
+ value.u[2].f = 1.0f;
+ value.u[3].f = 1.0f;
+ vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value,
+ VKD3D_DATA_FLOAT, instr->data_type->dimx, dst_param->write_mask);
+
+ vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask);
+}
+
+static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx,
+ struct vsir_program *program, struct hlsl_ir_expr *expr, const char *dst_type_name)
+{
+ const struct hlsl_type *dst_type = expr->node.data_type;
+ const struct hlsl_type *src_type = NULL;
+
+ VKD3D_ASSERT(expr->node.reg.allocated);
+ if (expr->operands[0].node)
+ src_type = expr->operands[0].node->data_type;
+
+ switch (expr->op)
+ {
+ case HLSL_OP0_RASTERIZER_SAMPLE_COUNT:
+ sm4_generate_vsir_rasterizer_sample_count(ctx, program, expr);
+ return true;
+
+ case HLSL_OP1_ABS:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true);
+ return true;
+
+ case HLSL_OP1_BIT_NOT:
+ VKD3D_ASSERT(type_is_integer(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_CAST:
+ return sm4_generate_vsir_instr_expr_cast(ctx, program, expr);
+
+ case HLSL_OP1_CEIL:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_PI, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_COS:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 1);
+ return true;
+
+ case HLSL_OP1_DSX:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_DSX_COARSE:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_COARSE, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_DSX_FINE:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_FINE, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_DSY:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_DSY_COARSE:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_COARSE, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_DSY_FINE:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_FINE, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_EXP2:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EXP, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_F16TOF32:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F16TOF32, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_F32TOF16:
+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT);
+ VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F32TOF16, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_FLOOR:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NI, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_FRACT:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_LOG2:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LOG, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_LOGIC_NOT:
+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_NEG:
+ switch (dst_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true);
+ return true;
+
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INEG, 0, 0, true);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_name);
+ return false;
+ }
+
+ case HLSL_OP1_RCP:
+ switch (dst_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ /* SM5 comes with a RCP opcode */
+ if (hlsl_version_ge(ctx, 5, 0))
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RCP, 0, 0, true);
+ else
+ sm4_generate_vsir_rcp_using_div(ctx, program, expr);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_name);
+ return false;
+ }
+
+ case HLSL_OP1_REINTERPRET:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_ROUND:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NE, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_RSQ:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RSQ, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_SAT:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true);
+ return true;
+
+ case HLSL_OP1_SIN:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 0);
+ return true;
+
+ case HLSL_OP1_SQRT:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SQRT, 0, 0, true);
+ return true;
+
+ case HLSL_OP1_TRUNC:
+ VKD3D_ASSERT(type_is_float(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_Z, 0, 0, true);
+ return true;
+
+ case HLSL_OP2_ADD:
+ switch (dst_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IADD, 0, 0, true);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_name);
+ return false;
+ }
+
+ case HLSL_OP2_BIT_AND:
+ VKD3D_ASSERT(type_is_integer(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true);
+ return true;
+
+ case HLSL_OP2_BIT_OR:
+ VKD3D_ASSERT(type_is_integer(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true);
+ return true;
+
+ case HLSL_OP2_BIT_XOR:
+ VKD3D_ASSERT(type_is_integer(dst_type));
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_XOR, 0, 0, true);
+ return true;
+
+ case HLSL_OP2_DIV:
+ switch (dst_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DIV, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_UINT:
+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 0);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_name);
+ return false;
+ }
+
+ case HLSL_OP2_DOT:
+ switch (dst_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ switch (expr->operands[0].node->data_type->dimx)
+ {
+ case 4:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false);
+ return true;
+
+ case 3:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false);
+ return true;
+
+ case 2:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2, 0, 0, false);
+ return true;
+
+ case 1:
+ default:
+ vkd3d_unreachable();
+ }
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_name);
+ return false;
+ }
+
+ case HLSL_OP2_EQUAL:
+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
+
+ switch (src_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EQO, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_BOOL:
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IEQ, 0, 0, true);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.",
+ debug_hlsl_type(ctx, src_type));
+ return false;
+ }
+
+ case HLSL_OP2_GEQUAL:
+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
+
+ switch (src_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_GEO, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_INT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IGE, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_BOOL:
+ case HLSL_TYPE_UINT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UGE, 0, 0, true);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.",
+ debug_hlsl_type(ctx, src_type));
+ return false;
+ }
+
+ case HLSL_OP2_LESS:
+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
+
+ switch (src_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LTO, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_INT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ILT, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_BOOL:
+ case HLSL_TYPE_UINT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ULT, 0, 0, true);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.",
+ debug_hlsl_type(ctx, src_type));
+ return false;
+ }
+
+ case HLSL_OP2_LOGIC_AND:
+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true);
+ return true;
+
+ case HLSL_OP2_LOGIC_OR:
+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true);
+ return true;
+
+ case HLSL_OP2_LSHIFT:
+ VKD3D_ASSERT(type_is_integer(dst_type));
+ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ISHL, 0, 0, true);
+ return true;
+
+ case HLSL_OP3_MAD:
+ switch (dst_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAD, 0, 0, true);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s MAD expression.", dst_type_name);
+ return false;
+ }
+
+ case HLSL_OP2_MAX:
+ switch (dst_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_INT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAX, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_UINT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMAX, 0, 0, true);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_name);
+ return false;
+ }
+
+ case HLSL_OP2_MIN:
+ switch (dst_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_INT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMIN, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_UINT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMIN, 0, 0, true);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_name);
+ return false;
+ }
+
+ case HLSL_OP2_MOD:
+ switch (dst_type->e.numeric.type)
+ {
+ case HLSL_TYPE_UINT:
+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 1);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_name);
+ return false;
+ }
+
+ case HLSL_OP2_MUL:
+ switch (dst_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ /* Using IMUL instead of UMUL because we're taking the low
+ * bits, and the native compiler generates IMUL. */
+ sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_IMUL, expr, 1);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_name);
+ return false;
+ }
+
+ case HLSL_OP2_NEQUAL:
+ VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
+
+ switch (src_type->e.numeric.type)
+ {
+ case HLSL_TYPE_FLOAT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NEU, 0, 0, true);
+ return true;
+
+ case HLSL_TYPE_BOOL:
+ case HLSL_TYPE_INT:
+ case HLSL_TYPE_UINT:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INE, 0, 0, true);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.",
+ debug_hlsl_type(ctx, src_type));
+ return false;
+ }
+
+ case HLSL_OP2_RSHIFT:
+ VKD3D_ASSERT(type_is_integer(dst_type));
+ VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL);
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr,
+ dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3DSIH_ISHR : VKD3DSIH_USHR, 0, 0, true);
+ return true;
+
+ case HLSL_OP3_TERNARY:
+ generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOVC, 0, 0, true);
+ return true;
+
+ default:
+ hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op));
+ return false;
+ }
+}
+
+static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program)
+{
+ struct vkd3d_string_buffer *dst_type_string;
+ struct hlsl_ir_node *instr, *next;
+ struct hlsl_ir_switch_case *c;
+
+ LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
+ {
+ if (instr->data_type)
+ {
+ if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
+ {
+ hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class);
+ break;
+ }
+ }
+
+ switch (instr->type)
+ {
+ case HLSL_IR_CALL:
+ vkd3d_unreachable();
+
+ case HLSL_IR_CONSTANT:
+ /* In SM4 all constants are inlined. */
+ break;
+
+ case HLSL_IR_EXPR:
+ if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type)))
+ break;
+
+ if (sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer))
+ replace_instr_with_last_vsir_instr(ctx, program, instr);
+
+ hlsl_release_string_buffer(ctx, dst_type_string);
+ break;
+
+ case HLSL_IR_IF:
+ sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->then_block, program);
+ sm4_generate_vsir_block(ctx, &hlsl_ir_if(instr)->else_block, program);
+ break;
+
+ case HLSL_IR_LOOP:
+ sm4_generate_vsir_block(ctx, &hlsl_ir_loop(instr)->body, program);
+ break;
+
+ case HLSL_IR_SWITCH:
+ LIST_FOR_EACH_ENTRY(c, &hlsl_ir_switch(instr)->cases, struct hlsl_ir_switch_case, entry)
+ sm4_generate_vsir_block(ctx, &c->body, program);
+ break;
+
+ case HLSL_IR_SWIZZLE:
+ generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr));
+ replace_instr_with_last_vsir_instr(ctx, program, instr);
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx,
+ struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program)
+{
+ bool is_patch_constant_func = func == ctx->patch_constant_func;
+ struct hlsl_block block = {0};
+ struct hlsl_scope *scope;
+ struct hlsl_ir_var *var;
+ uint32_t temp_count;
+
+ compute_liveness(ctx, func);
+ mark_indexable_vars(ctx, func);
+ temp_count = allocate_temp_registers(ctx, func);
+ if (ctx->result)
+ return;
+ program->temp_count = max(program->temp_count, temp_count);
+
+ hlsl_block_init(&block);
+
+ LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
+ {
+ if ((var->is_input_semantic && var->last_read)
+ || (var->is_output_semantic && var->first_write))
+ sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, is_patch_constant_func, &block, &var->loc);
+ }
if (temp_count)
sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc);
diff --git a/libs/vkd3d/libs/vkd3d-shader/ir.c b/libs/vkd3d/libs/vkd3d-shader/ir.c
index 9b50a308e11..836e0ade32a 100644
--- a/libs/vkd3d/libs/vkd3d-shader/ir.c
+++ b/libs/vkd3d/libs/vkd3d-shader/ir.c
@@ -1932,6 +1932,8 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par
const struct shader_signature *signature;
const struct signature_element *e;
+ write_mask = dst_param->write_mask;
+
switch (reg->type)
{
case VKD3DSPR_OUTPUT:
@@ -1987,6 +1989,10 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par
signature = normaliser->output_signature;
reg->type = VKD3DSPR_OUTPUT;
dcl_params = normaliser->output_dcl_params;
+ /* Fog and point size are scalar, but fxc/d3dcompiler emits a full
+ * write mask when writing to them. */
+ if (reg->idx[0].offset > 0)
+ write_mask = VKD3DSP_WRITEMASK_0;
break;
default:
@@ -1994,7 +2000,6 @@ static bool shader_dst_param_io_normalise(struct vkd3d_shader_dst_param *dst_par
}
id_idx = reg->idx_count - 1;
- write_mask = dst_param->write_mask;
if (!shader_signature_find_element_for_reg(signature, reg_idx, write_mask, &element_idx))
vkd3d_unreachable();
e = &signature->elements[element_idx];
@@ -6214,6 +6219,14 @@ static enum vkd3d_result vsir_program_insert_clip_planes(struct vsir_program *pr
return VKD3D_OK;
}
+static bool is_pre_rasterization_shader(enum vkd3d_shader_type type)
+{
+ return type == VKD3D_SHADER_TYPE_VERTEX
+ || type == VKD3D_SHADER_TYPE_HULL
+ || type == VKD3D_SHADER_TYPE_DOMAIN
+ || type == VKD3D_SHADER_TYPE_GEOMETRY;
+}
+
static enum vkd3d_result insert_point_size_before_ret(struct vsir_program *program,
const struct vkd3d_shader_instruction *ret, size_t *ret_pos)
{
@@ -6244,10 +6257,7 @@ static enum vkd3d_result vsir_program_insert_point_size(struct vsir_program *pro
if (program->has_point_size)
return VKD3D_OK;
- if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX
- && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY
- && program->shader_version.type != VKD3D_SHADER_TYPE_HULL
- && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN)
+ if (!is_pre_rasterization_shader(program->shader_version.type))
return VKD3D_OK;
for (unsigned int i = 0; i < program->parameter_count; ++i)
@@ -6298,10 +6308,7 @@ static enum vkd3d_result vsir_program_insert_point_size_clamp(struct vsir_progra
if (!program->has_point_size)
return VKD3D_OK;
- if (program->shader_version.type != VKD3D_SHADER_TYPE_VERTEX
- && program->shader_version.type != VKD3D_SHADER_TYPE_GEOMETRY
- && program->shader_version.type != VKD3D_SHADER_TYPE_HULL
- && program->shader_version.type != VKD3D_SHADER_TYPE_DOMAIN)
+ if (!is_pre_rasterization_shader(program->shader_version.type))
return VKD3D_OK;
for (unsigned int i = 0; i < program->parameter_count; ++i)
diff --git a/libs/vkd3d/libs/vkd3d-shader/spirv.c b/libs/vkd3d/libs/vkd3d-shader/spirv.c
index fb7ce063c85..3fa4d68a48a 100644
--- a/libs/vkd3d/libs/vkd3d-shader/spirv.c
+++ b/libs/vkd3d/libs/vkd3d-shader/spirv.c
@@ -8904,15 +8904,20 @@ static void spirv_compiler_emit_ld_raw_structured_srv_uav(struct spirv_compiler
uint32_t base_coordinate_id, component_idx;
uint32_t constituents[VKD3D_VEC4_SIZE];
struct vkd3d_shader_image image;
+ bool storage_buffer_uav = false;
uint32_t indices[2];
unsigned int i, j;
SpvOp op;
resource = &src[instruction->src_count - 1];
- resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg);
- if (resource->reg.type == VKD3DSPR_UAV
- && spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource))
+ if (resource->reg.type == VKD3DSPR_UAV)
+ {
+ resource_symbol = spirv_compiler_find_resource(compiler, &resource->reg);
+ storage_buffer_uav = spirv_compiler_use_storage_buffer(compiler, &resource_symbol->info.resource);
+ }
+
+ if (storage_buffer_uav)
{
texel_type_id = vkd3d_spirv_get_type_id(builder, resource_symbol->info.resource.sampled_type, 1);
ptr_type_id = vkd3d_spirv_get_op_type_pointer(builder, SpvStorageClassUniform, texel_type_id);
diff --git a/libs/vkd3d/libs/vkd3d-shader/tpf.c b/libs/vkd3d/libs/vkd3d-shader/tpf.c
index bbd2f761d29..9c41e2c2053 100644
--- a/libs/vkd3d/libs/vkd3d-shader/tpf.c
+++ b/libs/vkd3d/libs/vkd3d-shader/tpf.c
@@ -1719,7 +1719,7 @@ static void init_sm4_lookup_tables(struct vkd3d_sm4_lookup_tables *lookup)
{VKD3D_SM4_RT_PRIMID, VKD3DSPR_PRIMID, VKD3D_SM4_SWIZZLE_NONE},
{VKD3D_SM4_RT_DEPTHOUT, VKD3DSPR_DEPTHOUT, VKD3D_SM4_SWIZZLE_VEC4},
{VKD3D_SM4_RT_NULL, VKD3DSPR_NULL, VKD3D_SM4_SWIZZLE_INVALID},
- {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER, VKD3D_SM4_SWIZZLE_VEC4},
+ {VKD3D_SM4_RT_RASTERIZER, VKD3DSPR_RASTERIZER, VKD3D_SM4_SWIZZLE_SCALAR},
{VKD3D_SM4_RT_OMASK, VKD3DSPR_SAMPLEMASK, VKD3D_SM4_SWIZZLE_VEC4},
{VKD3D_SM5_RT_STREAM, VKD3DSPR_STREAM, VKD3D_SM4_SWIZZLE_VEC4},
{VKD3D_SM5_RT_FUNCTION_BODY, VKD3DSPR_FUNCTIONBODY, VKD3D_SM4_SWIZZLE_VEC4},
@@ -2235,7 +2235,7 @@ static bool shader_sm4_read_param(struct vkd3d_shader_sm4_parser *priv, const ui
return true;
}
-static bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg)
+bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg)
{
switch (reg->type)
{
@@ -2995,20 +2995,6 @@ int tpf_parse(const struct vkd3d_shader_compile_info *compile_info, uint64_t con
static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *block);
-static bool type_is_integer(const struct hlsl_type *type)
-{
- switch (type->e.numeric.type)
- {
- case HLSL_TYPE_BOOL:
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- return true;
-
- default:
- return false;
- }
-}
-
bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version,
const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx)
{
@@ -4845,7 +4831,15 @@ static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct
}
else
{
- instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE;
+ switch (component_type->sampler_dim)
+ {
+ case HLSL_SAMPLER_DIM_RAW_BUFFER:
+ instr.opcode = VKD3D_SM5_OP_DCL_RESOURCE_RAW;
+ break;
+ default:
+ instr.opcode = VKD3D_SM4_OP_DCL_RESOURCE;
+ break;
+ }
}
instr.extra_bits |= (sm4_resource_dimension(component_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT);
@@ -4856,135 +4850,62 @@ static void write_sm4_dcl_textures(const struct tpf_compiler *tpf, const struct
}
}
-static void tpf_write_dcl_semantic(const struct tpf_compiler *tpf,
- const struct hlsl_ir_var *var, bool is_patch_constant_func)
+static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count)
{
- const struct vkd3d_shader_version *version = &tpf->program->shader_version;
- const bool output = var->is_output_semantic;
- enum vkd3d_shader_sysval_semantic semantic;
- bool has_idx;
-
struct sm4_instruction instr =
{
- .dsts[0].reg.dimension = VSIR_DIMENSION_VEC4,
- .dst_count = 1,
- };
-
- if (sm4_register_from_semantic_name(version, var->semantic.name, output, &instr.dsts[0].reg.type, &has_idx))
- {
- if (has_idx)
- {
- instr.dsts[0].reg.idx[0].offset = var->semantic.index;
- instr.dsts[0].reg.idx_count = 1;
- }
- else
- {
- instr.dsts[0].reg.idx_count = 0;
- }
- instr.dsts[0].write_mask = (1 << var->data_type->dimx) - 1;
- }
- else
- {
- if (output)
- instr.dsts[0].reg.type = VKD3DSPR_OUTPUT;
- else if (version->type == VKD3D_SHADER_TYPE_DOMAIN)
- instr.dsts[0].reg.type = VKD3DSPR_PATCHCONST;
- else
- instr.dsts[0].reg.type = VKD3DSPR_INPUT;
-
- instr.dsts[0].reg.idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id;
- instr.dsts[0].reg.idx_count = 1;
- instr.dsts[0].write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask;
- }
-
- if (shader_sm4_is_scalar_register(&instr.dsts[0].reg))
- instr.dsts[0].reg.dimension = VSIR_DIMENSION_SCALAR;
-
- sm4_sysval_semantic_from_semantic_name(&semantic, version, tpf->ctx->semantic_compat_mapping,
- tpf->ctx->domain, var->semantic.name, var->semantic.index, output, is_patch_constant_func);
- if (semantic == ~0u)
- semantic = VKD3D_SHADER_SV_NONE;
-
- if (var->is_input_semantic)
- {
- switch (semantic)
- {
- case VKD3D_SHADER_SV_NONE:
- instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
- ? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT;
- break;
-
- case VKD3D_SHADER_SV_INSTANCE_ID:
- case VKD3D_SHADER_SV_IS_FRONT_FACE:
- case VKD3D_SHADER_SV_PRIMITIVE_ID:
- case VKD3D_SHADER_SV_SAMPLE_INDEX:
- case VKD3D_SHADER_SV_VERTEX_ID:
- instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
- ? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV;
- break;
+ .opcode = VKD3D_SM4_OP_DCL_TEMPS,
- default:
- instr.opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
- ? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV;
- break;
- }
+ .idx = {count},
+ .idx_count = 1,
+ };
- if (version->type == VKD3D_SHADER_TYPE_PIXEL)
- {
- enum vkd3d_shader_interpolation_mode mode;
+ write_sm4_instruction(tpf, &instr);
+}
- mode = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers);
- instr.extra_bits |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT;
- }
- }
- else
+static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp)
+{
+ struct sm4_instruction instr =
{
- if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL)
- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT;
- else
- instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV;
- }
+ .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP,
- if (instr.opcode == VKD3D_SM4_OP_DCL_OUTPUT)
- {
- VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || semantic == VKD3D_SHADER_SV_TARGET
- || instr.dsts[0].reg.type != VKD3DSPR_OUTPUT);
- }
- else if (instr.opcode == VKD3D_SM4_OP_DCL_INPUT || instr.opcode == VKD3D_SM4_OP_DCL_INPUT_PS)
- {
- VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE);
- }
- else
- {
- VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE);
- instr.idx_count = 1;
- instr.idx[0] = vkd3d_siv_from_sysval_indexed(semantic, var->semantic.index);
- }
+ .idx = {temp->register_idx, temp->register_size, temp->component_count},
+ .idx_count = 3,
+ };
write_sm4_instruction(tpf, &instr);
}
-static void tpf_dcl_temps(const struct tpf_compiler *tpf, unsigned int count)
+static void tpf_dcl_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode,
+ const struct vkd3d_shader_dst_param *dst, uint32_t interpolation_flags)
{
struct sm4_instruction instr =
{
- .opcode = VKD3D_SM4_OP_DCL_TEMPS,
+ .opcode = opcode,
- .idx = {count},
- .idx_count = 1,
+ .dsts[0] = *dst,
+ .dst_count = 1,
+
+ .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT,
};
write_sm4_instruction(tpf, &instr);
}
-static void tpf_dcl_indexable_temp(const struct tpf_compiler *tpf, const struct vkd3d_shader_indexable_temp *temp)
+static void tpf_dcl_siv_semantic(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode,
+ const struct vkd3d_shader_register_semantic *semantic, uint32_t interpolation_flags)
{
struct sm4_instruction instr =
{
- .opcode = VKD3D_SM4_OP_DCL_INDEXABLE_TEMP,
+ .opcode = opcode,
- .idx = {temp->register_idx, temp->register_size, temp->component_count},
- .idx_count = 3,
+ .dsts[0] = semantic->reg,
+ .dst_count = 1,
+
+ .idx[0] = semantic->sysval_semantic,
+ .idx_count = 1,
+
+ .extra_bits = interpolation_flags << VKD3D_SM4_INTERPOLATION_MODE_SHIFT,
};
write_sm4_instruction(tpf, &instr);
@@ -5111,125 +5032,6 @@ static void write_sm4_ret(const struct tpf_compiler *tpf)
write_sm4_instruction(tpf, &instr);
}
-static void write_sm4_unary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode,
- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, enum vkd3d_shader_src_modifier src_mod)
-{
- struct sm4_instruction instr;
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = opcode;
-
- sm4_dst_from_node(&instr.dsts[0], dst);
- instr.dst_count = 1;
-
- sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[0].write_mask);
- instr.srcs[0].modifiers = src_mod;
- instr.src_count = 1;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_unary_op_with_two_destinations(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode,
- const struct hlsl_ir_node *dst, unsigned int dst_idx, const struct hlsl_ir_node *src)
-{
- struct sm4_instruction instr;
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = opcode;
-
- VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts));
- sm4_dst_from_node(&instr.dsts[dst_idx], dst);
- instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL;
- instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE;
- instr.dsts[1 - dst_idx].reg.idx_count = 0;
- instr.dst_count = 2;
-
- sm4_src_from_node(tpf, &instr.srcs[0], src, instr.dsts[dst_idx].write_mask);
- instr.src_count = 1;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_binary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode,
- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2)
-{
- struct sm4_instruction instr;
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = opcode;
-
- sm4_dst_from_node(&instr.dsts[0], dst);
- instr.dst_count = 1;
-
- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask);
- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask);
- instr.src_count = 2;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-/* dp# instructions don't map the swizzle. */
-static void write_sm4_binary_op_dot(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode,
- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2)
-{
- struct sm4_instruction instr;
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = opcode;
-
- sm4_dst_from_node(&instr.dsts[0], dst);
- instr.dst_count = 1;
-
- sm4_src_from_node(tpf, &instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL);
- sm4_src_from_node(tpf, &instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL);
- instr.src_count = 2;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_binary_op_with_two_destinations(const struct tpf_compiler *tpf,
- enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned int dst_idx,
- const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2)
-{
- struct sm4_instruction instr;
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = opcode;
-
- VKD3D_ASSERT(dst_idx < ARRAY_SIZE(instr.dsts));
- sm4_dst_from_node(&instr.dsts[dst_idx], dst);
- instr.dsts[1 - dst_idx].reg.type = VKD3DSPR_NULL;
- instr.dsts[1 - dst_idx].reg.dimension = VSIR_DIMENSION_NONE;
- instr.dsts[1 - dst_idx].reg.idx_count = 0;
- instr.dst_count = 2;
-
- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[dst_idx].write_mask);
- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[dst_idx].write_mask);
- instr.src_count = 2;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_ternary_op(const struct tpf_compiler *tpf, enum vkd3d_sm4_opcode opcode,
- const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2,
- const struct hlsl_ir_node *src3)
-{
- struct sm4_instruction instr;
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = opcode;
-
- sm4_dst_from_node(&instr.dsts[0], dst);
- instr.dst_count = 1;
-
- sm4_src_from_node(tpf, &instr.srcs[0], src1, instr.dsts[0].write_mask);
- sm4_src_from_node(tpf, &instr.srcs[1], src2, instr.dsts[0].write_mask);
- sm4_src_from_node(tpf, &instr.srcs[2], src3, instr.dsts[0].write_mask);
- instr.src_count = 3;
-
- write_sm4_instruction(tpf, &instr);
-}
-
static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst,
const struct hlsl_deref *resource, const struct hlsl_ir_node *coords,
const struct hlsl_ir_node *sample_index, const struct hlsl_ir_node *texel_offset,
@@ -5240,12 +5042,15 @@ static void write_sm4_ld(const struct tpf_compiler *tpf, const struct hlsl_ir_no
&& (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY);
bool uav = (hlsl_deref_get_regset(tpf->ctx, resource) == HLSL_REGSET_UAVS);
const struct vkd3d_shader_version *version = &tpf->program->shader_version;
+ bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER;
unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL;
struct sm4_instruction instr;
memset(&instr, 0, sizeof(instr));
if (uav)
instr.opcode = VKD3D_SM5_OP_LD_UAV_TYPED;
+ else if (raw)
+ instr.opcode = VKD3D_SM5_OP_LD_RAW;
else
instr.opcode = multisampled ? VKD3D_SM4_OP_LD2DMS : VKD3D_SM4_OP_LD;
@@ -5441,742 +5246,67 @@ static void write_sm4_resinfo(const struct tpf_compiler *tpf, const struct hlsl_
write_sm4_instruction(tpf, &instr);
}
-static bool type_is_float(const struct hlsl_type *type)
+static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff)
{
- return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF;
-}
+ struct sm4_instruction instr =
+ {
+ .opcode = VKD3D_SM4_OP_IF,
+ .extra_bits = VKD3D_SM4_CONDITIONAL_NZ,
+ .src_count = 1,
+ };
-static void write_sm4_cast_from_bool(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr,
- const struct hlsl_ir_node *arg, uint32_t mask)
-{
- struct sm4_instruction instr;
+ VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1);
- memset(&instr, 0, sizeof(instr));
- instr.opcode = VKD3D_SM4_OP_AND;
+ sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL);
+ write_sm4_instruction(tpf, &instr);
- sm4_dst_from_node(&instr.dsts[0], &expr->node);
- instr.dst_count = 1;
+ write_sm4_block(tpf, &iff->then_block);
- sm4_src_from_node(tpf, &instr.srcs[0], arg, instr.dsts[0].write_mask);
- instr.srcs[1].reg.type = VKD3DSPR_IMMCONST;
- instr.srcs[1].reg.dimension = VSIR_DIMENSION_SCALAR;
- instr.srcs[1].reg.u.immconst_u32[0] = mask;
- instr.src_count = 2;
+ if (!list_empty(&iff->else_block.instrs))
+ {
+ instr.opcode = VKD3D_SM4_OP_ELSE;
+ instr.src_count = 0;
+ write_sm4_instruction(tpf, &instr);
+
+ write_sm4_block(tpf, &iff->else_block);
+ }
+ instr.opcode = VKD3D_SM4_OP_ENDIF;
+ instr.src_count = 0;
write_sm4_instruction(tpf, &instr);
}
-static void write_sm4_cast(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr)
+static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump)
{
- static const union
- {
- uint32_t u;
- float f;
- } one = { .f = 1.0 };
- const struct hlsl_ir_node *arg1 = expr->operands[0].node;
- const struct hlsl_type *dst_type = expr->node.data_type;
- const struct hlsl_type *src_type = arg1->data_type;
-
- /* Narrowing casts were already lowered. */
- VKD3D_ASSERT(src_type->dimx == dst_type->dimx);
+ struct sm4_instruction instr = {0};
- switch (dst_type->e.numeric.type)
+ switch (jump->type)
{
- case HLSL_TYPE_HALF:
- case HLSL_TYPE_FLOAT:
- switch (src_type->e.numeric.type)
- {
- case HLSL_TYPE_HALF:
- case HLSL_TYPE_FLOAT:
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
- break;
-
- case HLSL_TYPE_INT:
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0);
- break;
-
- case HLSL_TYPE_UINT:
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0);
- break;
-
- case HLSL_TYPE_BOOL:
- write_sm4_cast_from_bool(tpf, expr, arg1, one.u);
- break;
-
- case HLSL_TYPE_DOUBLE:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to float.");
- break;
-
- default:
- vkd3d_unreachable();
- }
+ case HLSL_IR_JUMP_BREAK:
+ instr.opcode = VKD3D_SM4_OP_BREAK;
break;
- case HLSL_TYPE_INT:
- switch (src_type->e.numeric.type)
- {
- case HLSL_TYPE_HALF:
- case HLSL_TYPE_FLOAT:
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0);
- break;
-
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
- break;
-
- case HLSL_TYPE_BOOL:
- write_sm4_cast_from_bool(tpf, expr, arg1, 1);
- break;
-
- case HLSL_TYPE_DOUBLE:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to int.");
- break;
-
- default:
- vkd3d_unreachable();
- }
+ case HLSL_IR_JUMP_CONTINUE:
+ instr.opcode = VKD3D_SM4_OP_CONTINUE;
break;
- case HLSL_TYPE_UINT:
- switch (src_type->e.numeric.type)
- {
- case HLSL_TYPE_HALF:
- case HLSL_TYPE_FLOAT:
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0);
- break;
-
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
- break;
-
- case HLSL_TYPE_BOOL:
- write_sm4_cast_from_bool(tpf, expr, arg1, 1);
- break;
-
- case HLSL_TYPE_DOUBLE:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast from double to uint.");
- break;
+ case HLSL_IR_JUMP_DISCARD_NZ:
+ {
+ instr.opcode = VKD3D_SM4_OP_DISCARD;
+ instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ;
- default:
- vkd3d_unreachable();
- }
+ memset(&instr.srcs[0], 0, sizeof(*instr.srcs));
+ instr.src_count = 1;
+ sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL);
break;
+ }
- case HLSL_TYPE_DOUBLE:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 cast to double.");
- break;
+ case HLSL_IR_JUMP_RETURN:
+ vkd3d_unreachable();
- case HLSL_TYPE_BOOL:
- /* Casts to bool should have already been lowered. */
default:
- vkd3d_unreachable();
- }
-}
-
-static void write_sm4_rasterizer_sample_count(const struct tpf_compiler *tpf, const struct hlsl_ir_node *dst)
-{
- struct sm4_instruction instr;
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = VKD3D_SM4_OP_SAMPLE_INFO;
- instr.extra_bits |= VKD3DSI_SAMPLE_INFO_UINT << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT;
-
- sm4_dst_from_node(&instr.dsts[0], dst);
- instr.dst_count = 1;
-
- instr.srcs[0].reg.type = VKD3DSPR_RASTERIZER;
- instr.srcs[0].reg.dimension = VSIR_DIMENSION_VEC4;
- instr.srcs[0].swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
- instr.src_count = 1;
-
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_expr(const struct tpf_compiler *tpf, const struct hlsl_ir_expr *expr)
-{
- const struct vkd3d_shader_version *version = &tpf->program->shader_version;
- const struct hlsl_ir_node *arg1 = expr->operands[0].node;
- const struct hlsl_ir_node *arg2 = expr->operands[1].node;
- const struct hlsl_ir_node *arg3 = expr->operands[2].node;
- const struct hlsl_type *dst_type = expr->node.data_type;
- struct vkd3d_string_buffer *dst_type_string;
-
- VKD3D_ASSERT(expr->node.reg.allocated);
-
- if (!(dst_type_string = hlsl_type_to_string(tpf->ctx, dst_type)))
- return;
-
- switch (expr->op)
- {
- case HLSL_OP0_RASTERIZER_SAMPLE_COUNT:
- if (version->type == VKD3D_SHADER_TYPE_PIXEL && vkd3d_shader_ver_ge(version, 4, 1))
- write_sm4_rasterizer_sample_count(tpf, &expr->node);
- else
- hlsl_error(tpf->ctx, &expr->node.loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
- "GetRenderTargetSampleCount() can only be used from a pixel shader using version 4.1 or higher.");
- break;
-
- case HLSL_OP1_ABS:
- switch (dst_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_ABS);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer);
- }
- break;
-
- case HLSL_OP1_BIT_NOT:
- VKD3D_ASSERT(type_is_integer(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_CAST:
- write_sm4_cast(tpf, expr);
- break;
-
- case HLSL_OP1_CEIL:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_PI, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_COS:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1);
- break;
-
- case HLSL_OP1_DSX:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTX, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_DSX_COARSE:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_COARSE, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_DSX_FINE:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTX_FINE, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_DSY:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_DERIV_RTY, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_DSY_COARSE:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_COARSE, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_DSY_FINE:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM5_OP_DERIV_RTY_FINE, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_EXP2:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_F16TOF32:
- VKD3D_ASSERT(type_is_float(dst_type));
- VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0));
- write_sm4_unary_op(tpf, VKD3D_SM5_OP_F16TOF32, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_F32TOF16:
- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT);
- VKD3D_ASSERT(hlsl_version_ge(tpf->ctx, 5, 0));
- write_sm4_unary_op(tpf, VKD3D_SM5_OP_F32TOF16, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_FLOOR:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_FRACT:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_LOG2:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_LOGIC_NOT:
- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_NEG:
- switch (dst_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3DSPSM_NEG);
- break;
-
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer);
- }
- break;
-
- case HLSL_OP1_RCP:
- switch (dst_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- /* SM5 comes with a RCP opcode */
- if (vkd3d_shader_ver_ge(version, 5, 0))
- {
- write_sm4_unary_op(tpf, VKD3D_SM5_OP_RCP, &expr->node, arg1, 0);
- }
- else
- {
- /* For SM4, implement as DIV dst, 1.0, src */
- struct sm4_instruction instr;
- struct hlsl_constant_value one;
-
- VKD3D_ASSERT(type_is_float(dst_type));
-
- memset(&instr, 0, sizeof(instr));
- instr.opcode = VKD3D_SM4_OP_DIV;
-
- sm4_dst_from_node(&instr.dsts[0], &expr->node);
- instr.dst_count = 1;
-
- for (unsigned int i = 0; i < 4; i++)
- one.u[i].f = 1.0f;
- sm4_src_from_constant_value(&instr.srcs[0], &one, dst_type->dimx, instr.dsts[0].write_mask);
- sm4_src_from_node(tpf, &instr.srcs[1], arg1, instr.dsts[0].write_mask);
- instr.src_count = 2;
-
- write_sm4_instruction(tpf, &instr);
- }
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_string->buffer);
- }
- break;
-
- case HLSL_OP1_REINTERPRET:
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_ROUND:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_RSQ:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_SAT:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_MOV
- | (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT),
- &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_SIN:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op_with_two_destinations(tpf, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1);
- break;
-
- case HLSL_OP1_SQRT:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP1_TRUNC:
- VKD3D_ASSERT(type_is_float(dst_type));
- write_sm4_unary_op(tpf, VKD3D_SM4_OP_ROUND_Z, &expr->node, arg1, 0);
- break;
-
- case HLSL_OP2_ADD:
- switch (dst_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer);
- }
- break;
-
- case HLSL_OP2_BIT_AND:
- VKD3D_ASSERT(type_is_integer(dst_type));
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2);
- break;
-
- case HLSL_OP2_BIT_OR:
- VKD3D_ASSERT(type_is_integer(dst_type));
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2);
- break;
-
- case HLSL_OP2_BIT_XOR:
- VKD3D_ASSERT(type_is_integer(dst_type));
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2);
- break;
-
- case HLSL_OP2_DIV:
- switch (dst_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_UINT:
- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer);
- }
- break;
-
- case HLSL_OP2_DOT:
- switch (dst_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- switch (arg1->data_type->dimx)
- {
- case 4:
- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2);
- break;
-
- case 3:
- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2);
- break;
-
- case 2:
- write_sm4_binary_op_dot(tpf, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2);
- break;
-
- case 1:
- default:
- vkd3d_unreachable();
- }
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer);
- }
- break;
-
- case HLSL_OP2_EQUAL:
- {
- const struct hlsl_type *src_type = arg1->data_type;
-
- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
-
- switch (src_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_BOOL:
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.",
- debug_hlsl_type(tpf->ctx, src_type));
- break;
- }
- break;
- }
-
- case HLSL_OP2_GEQUAL:
- {
- const struct hlsl_type *src_type = arg1->data_type;
-
- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
-
- switch (src_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_INT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_BOOL:
- case HLSL_TYPE_UINT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.",
- debug_hlsl_type(tpf->ctx, src_type));
- break;
- }
- break;
- }
-
- case HLSL_OP2_LESS:
- {
- const struct hlsl_type *src_type = arg1->data_type;
-
- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
-
- switch (src_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_INT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_BOOL:
- case HLSL_TYPE_UINT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.",
- debug_hlsl_type(tpf->ctx, src_type));
- break;
- }
- break;
- }
-
- case HLSL_OP2_LOGIC_AND:
- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2);
- break;
-
- case HLSL_OP2_LOGIC_OR:
- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2);
- break;
-
- case HLSL_OP2_LSHIFT:
- VKD3D_ASSERT(type_is_integer(dst_type));
- VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL);
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2);
- break;
-
- case HLSL_OP2_MAX:
- switch (dst_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_INT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_UINT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer);
- }
- break;
-
- case HLSL_OP2_MIN:
- switch (dst_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_INT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_UINT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer);
- }
- break;
-
- case HLSL_OP2_MOD:
- switch (dst_type->e.numeric.type)
- {
- case HLSL_TYPE_UINT:
- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer);
- }
- break;
-
- case HLSL_OP2_MUL:
- switch (dst_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- /* Using IMUL instead of UMUL because we're taking the low
- * bits, and the native compiler generates IMUL. */
- write_sm4_binary_op_with_two_destinations(tpf, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer);
- }
- break;
-
- case HLSL_OP2_NEQUAL:
- {
- const struct hlsl_type *src_type = arg1->data_type;
-
- VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
-
- switch (src_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2);
- break;
-
- case HLSL_TYPE_BOOL:
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- write_sm4_binary_op(tpf, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.",
- debug_hlsl_type(tpf->ctx, src_type));
- break;
- }
- break;
- }
-
- case HLSL_OP2_RSHIFT:
- VKD3D_ASSERT(type_is_integer(dst_type));
- VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL);
- write_sm4_binary_op(tpf, dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR,
- &expr->node, arg1, arg2);
- break;
-
- case HLSL_OP3_TERNARY:
- write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MOVC, &expr->node, arg1, arg2, arg3);
- break;
-
- case HLSL_OP3_MAD:
- switch (dst_type->e.numeric.type)
- {
- case HLSL_TYPE_FLOAT:
- write_sm4_ternary_op(tpf, VKD3D_SM4_OP_MAD, &expr->node, arg1, arg2, arg3);
- break;
-
- case HLSL_TYPE_INT:
- case HLSL_TYPE_UINT:
- write_sm4_ternary_op(tpf, VKD3D_SM4_OP_IMAD, &expr->node, arg1, arg2, arg3);
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer);
- }
- break;
-
- default:
- hlsl_fixme(tpf->ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op));
- }
-
- hlsl_release_string_buffer(tpf->ctx, dst_type_string);
-}
-
-static void write_sm4_if(struct tpf_compiler *tpf, const struct hlsl_ir_if *iff)
-{
- struct sm4_instruction instr =
- {
- .opcode = VKD3D_SM4_OP_IF,
- .extra_bits = VKD3D_SM4_CONDITIONAL_NZ,
- .src_count = 1,
- };
-
- VKD3D_ASSERT(iff->condition.node->data_type->dimx == 1);
-
- sm4_src_from_node(tpf, &instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL);
- write_sm4_instruction(tpf, &instr);
-
- write_sm4_block(tpf, &iff->then_block);
-
- if (!list_empty(&iff->else_block.instrs))
- {
- instr.opcode = VKD3D_SM4_OP_ELSE;
- instr.src_count = 0;
- write_sm4_instruction(tpf, &instr);
-
- write_sm4_block(tpf, &iff->else_block);
- }
-
- instr.opcode = VKD3D_SM4_OP_ENDIF;
- instr.src_count = 0;
- write_sm4_instruction(tpf, &instr);
-}
-
-static void write_sm4_jump(const struct tpf_compiler *tpf, const struct hlsl_ir_jump *jump)
-{
- struct sm4_instruction instr = {0};
-
- switch (jump->type)
- {
- case HLSL_IR_JUMP_BREAK:
- instr.opcode = VKD3D_SM4_OP_BREAK;
- break;
-
- case HLSL_IR_JUMP_CONTINUE:
- instr.opcode = VKD3D_SM4_OP_CONTINUE;
- break;
-
- case HLSL_IR_JUMP_DISCARD_NZ:
- {
- instr.opcode = VKD3D_SM4_OP_DISCARD;
- instr.extra_bits = VKD3D_SM4_CONDITIONAL_NZ;
-
- memset(&instr.srcs[0], 0, sizeof(*instr.srcs));
- instr.src_count = 1;
- sm4_src_from_node(tpf, &instr.srcs[0], jump->condition.node, VKD3DSP_WRITEMASK_ALL);
- break;
- }
-
- case HLSL_IR_JUMP_RETURN:
- vkd3d_unreachable();
-
- default:
- hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type));
- return;
+ hlsl_fixme(tpf->ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type));
+ return;
}
write_sm4_instruction(tpf, &instr);
@@ -6506,11 +5636,22 @@ static void tpf_simple_instruction(struct tpf_compiler *tpf, const struct vkd3d_
}
instr.opcode = info->opcode;
+ instr.extra_bits = ins->flags << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT;
instr.dst_count = ins->dst_count;
instr.src_count = ins->src_count;
for (unsigned int i = 0; i < ins->dst_count; ++i)
+ {
instr.dsts[i] = ins->dst[i];
+
+ if (instr.dsts[i].modifiers & VKD3DSPDM_SATURATE)
+ {
+ /* For vsir SATURATE is a dst modifier, while for tpf it is an instruction flag. */
+ VKD3D_ASSERT(ins->dst_count == 1);
+ instr.dsts[i].modifiers &= ~VKD3DSPDM_SATURATE;
+ instr.extra_bits |= VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT;
+ }
+ }
for (unsigned int i = 0; i < ins->src_count; ++i)
instr.srcs[i] = ins->src[i];
@@ -6529,7 +5670,99 @@ static void tpf_handle_instruction(struct tpf_compiler *tpf, const struct vkd3d_
tpf_dcl_indexable_temp(tpf, &ins->declaration.indexable_temp);
break;
+ case VKD3DSIH_DCL_INPUT:
+ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT, &ins->declaration.dst, 0);
+ break;
+
+ case VKD3DSIH_DCL_INPUT_PS:
+ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS, &ins->declaration.dst, ins->flags);
+ break;
+
+ case VKD3DSIH_DCL_INPUT_PS_SGV:
+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SGV, &ins->declaration.register_semantic, 0);
+ break;
+
+ case VKD3DSIH_DCL_INPUT_PS_SIV:
+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_PS_SIV, &ins->declaration.register_semantic, ins->flags);
+ break;
+
+ case VKD3DSIH_DCL_INPUT_SGV:
+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SGV, &ins->declaration.register_semantic, 0);
+ break;
+
+ case VKD3DSIH_DCL_INPUT_SIV:
+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_INPUT_SIV, &ins->declaration.register_semantic, 0);
+ break;
+
+ case VKD3DSIH_DCL_OUTPUT:
+ tpf_dcl_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT, &ins->declaration.dst, 0);
+ break;
+
+ case VKD3DSIH_DCL_OUTPUT_SIV:
+ tpf_dcl_siv_semantic(tpf, VKD3D_SM4_OP_DCL_OUTPUT_SIV, &ins->declaration.register_semantic, 0);
+ break;
+
+ case VKD3DSIH_ADD:
+ case VKD3DSIH_AND:
+ case VKD3DSIH_DIV:
+ case VKD3DSIH_DP2:
+ case VKD3DSIH_DP3:
+ case VKD3DSIH_DP4:
+ case VKD3DSIH_DSX:
+ case VKD3DSIH_DSX_COARSE:
+ case VKD3DSIH_DSX_FINE:
+ case VKD3DSIH_DSY:
+ case VKD3DSIH_DSY_COARSE:
+ case VKD3DSIH_DSY_FINE:
+ case VKD3DSIH_EQO:
+ case VKD3DSIH_EXP:
+ case VKD3DSIH_F16TOF32:
+ case VKD3DSIH_F32TOF16:
+ case VKD3DSIH_FRC:
+ case VKD3DSIH_FTOI:
+ case VKD3DSIH_FTOU:
+ case VKD3DSIH_GEO:
+ case VKD3DSIH_IADD:
+ case VKD3DSIH_IEQ:
+ case VKD3DSIH_IGE:
+ case VKD3DSIH_ILT:
+ case VKD3DSIH_IMAD:
+ case VKD3DSIH_IMAX:
+ case VKD3DSIH_IMIN:
+ case VKD3DSIH_IMUL:
+ case VKD3DSIH_INE:
+ case VKD3DSIH_INEG:
+ case VKD3DSIH_ISHL:
+ case VKD3DSIH_ISHR:
+ case VKD3DSIH_ITOF:
+ case VKD3DSIH_LOG:
+ case VKD3DSIH_LTO:
+ case VKD3DSIH_MAD:
+ case VKD3DSIH_MAX:
+ case VKD3DSIH_MIN:
case VKD3DSIH_MOV:
+ case VKD3DSIH_MOVC:
+ case VKD3DSIH_MUL:
+ case VKD3DSIH_NEU:
+ case VKD3DSIH_NOT:
+ case VKD3DSIH_OR:
+ case VKD3DSIH_RCP:
+ case VKD3DSIH_ROUND_NE:
+ case VKD3DSIH_ROUND_NI:
+ case VKD3DSIH_ROUND_PI:
+ case VKD3DSIH_ROUND_Z:
+ case VKD3DSIH_RSQ:
+ case VKD3DSIH_SAMPLE_INFO:
+ case VKD3DSIH_SINCOS:
+ case VKD3DSIH_SQRT:
+ case VKD3DSIH_UDIV:
+ case VKD3DSIH_UGE:
+ case VKD3DSIH_ULT:
+ case VKD3DSIH_UMAX:
+ case VKD3DSIH_UMIN:
+ case VKD3DSIH_USHR:
+ case VKD3DSIH_UTOF:
+ case VKD3DSIH_XOR:
tpf_simple_instruction(tpf, ins);
break;
@@ -6568,10 +5801,6 @@ static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *b
case HLSL_IR_CONSTANT:
vkd3d_unreachable();
- case HLSL_IR_EXPR:
- write_sm4_expr(tpf, hlsl_ir_expr(instr));
- break;
-
case HLSL_IR_IF:
write_sm4_if(tpf, hlsl_ir_if(instr));
break;
@@ -6621,16 +5850,6 @@ static void write_sm4_block(struct tpf_compiler *tpf, const struct hlsl_block *b
static void tpf_write_shader_function(struct tpf_compiler *tpf, struct hlsl_ir_function_decl *func)
{
- struct hlsl_ctx *ctx = tpf->ctx;
- const struct hlsl_ir_var *var;
-
- LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
- {
- if ((var->is_input_semantic && var->last_read)
- || (var->is_output_semantic && var->first_write))
- tpf_write_dcl_semantic(tpf, var, func == ctx->patch_constant_func);
- }
-
if (tpf->program->shader_version.type == VKD3D_SHADER_TYPE_COMPUTE)
tpf_dcl_thread_group(tpf, &tpf->program->thread_group_size);
@@ -6648,6 +5867,7 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec
const struct hlsl_buffer *cbuffer;
struct hlsl_ctx *ctx = tpf->ctx;
size_t token_count_position;
+ uint32_t global_flags = 0;
static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] =
{
@@ -6669,6 +5889,27 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec
put_u32(&buffer, vkd3d_make_u32((version->major << 4) | version->minor, shader_types[version->type]));
token_count_position = put_u32(&buffer, 0);
+ if (version->major == 4)
+ {
+ for (i = 0; i < extern_resources_count; ++i)
+ {
+ const struct extern_resource *resource = &extern_resources[i];
+ const struct hlsl_type *type = resource->component_type;
+
+ if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
+ {
+ global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS;
+ break;
+ }
+ }
+ }
+
+ if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0))
+ global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL;
+
+ if (global_flags)
+ write_sm4_dcl_global_flags(tpf, global_flags);
+
if (version->type == VKD3D_SHADER_TYPE_HULL)
{
tpf_write_hs_decls(tpf);
@@ -6703,9 +5944,6 @@ static void tpf_write_shdr(struct tpf_compiler *tpf, struct hlsl_ir_function_dec
write_sm4_dcl_textures(tpf, resource, true);
}
- if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0))
- write_sm4_dcl_global_flags(tpf, VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL);
-
if (version->type == VKD3D_SHADER_TYPE_HULL)
tpf_write_hs_control_point_phase(tpf);
diff --git a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
index 1b6c37343d1..db18e6d12bc 100644
--- a/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
+++ b/libs/vkd3d/libs/vkd3d-shader/vkd3d_shader_private.h
@@ -1620,6 +1620,7 @@ bool sm1_usage_from_semantic_name(const char *semantic_name,
uint32_t semantic_index, enum vkd3d_decl_usage *usage, uint32_t *usage_idx);
bool sm4_register_from_semantic_name(const struct vkd3d_shader_version *version,
const char *semantic_name, bool output, enum vkd3d_shader_register_type *type, bool *has_idx);
+bool shader_sm4_is_scalar_register(const struct vkd3d_shader_register *reg);
bool sm4_sysval_semantic_from_semantic_name(enum vkd3d_shader_sysval_semantic *sysval_semantic,
const struct vkd3d_shader_version *version, bool semantic_compat_mapping, enum vkd3d_tessellator_domain domain,
const char *semantic_name, unsigned int semantic_idx, bool output, bool is_patch_constant_func);
diff --git a/libs/vkd3d/libs/vkd3d/command.c b/libs/vkd3d/libs/vkd3d/command.c
index ed4cc370639..a55a97f6f2f 100644
--- a/libs/vkd3d/libs/vkd3d/command.c
+++ b/libs/vkd3d/libs/vkd3d/command.c
@@ -4804,15 +4804,16 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi
VkDeviceSize offsets[ARRAY_SIZE(list->strides)];
const struct vkd3d_vk_device_procs *vk_procs;
VkBuffer buffers[ARRAY_SIZE(list->strides)];
+ struct d3d12_device *device = list->device;
+ unsigned int i, stride, max_view_count;
struct d3d12_resource *resource;
bool invalidate = false;
- unsigned int i, stride;
TRACE("iface %p, start_slot %u, view_count %u, views %p.\n", iface, start_slot, view_count, views);
- vk_procs = &list->device->vk_procs;
- null_resources = &list->device->null_resources;
- gpu_va_allocator = &list->device->gpu_va_allocator;
+ vk_procs = &device->vk_procs;
+ null_resources = &device->null_resources;
+ gpu_va_allocator = &device->gpu_va_allocator;
if (!vkd3d_bound_range(start_slot, view_count, ARRAY_SIZE(list->strides)))
{
@@ -4820,6 +4821,27 @@ static void STDMETHODCALLTYPE d3d12_command_list_IASetVertexBuffers(ID3D12Graphi
return;
}
+ max_view_count = device->vk_info.device_limits.maxVertexInputBindings;
+ if (start_slot < max_view_count)
+ max_view_count -= start_slot;
+ else
+ max_view_count = 0;
+
+ /* Although simply skipping unsupported binding slots isn't especially
+ * likely to work well in the general case, applications sometimes
+ * explicitly set all 32 vertex buffer bindings slots supported by
+ * Direct3D 12, with unused slots set to NULL. "Spider-Man Remastered" is
+ * an example of such an application. */
+ if (view_count > max_view_count)
+ {
+ for (i = max_view_count; i < view_count; ++i)
+ {
+ if (views && views[i].BufferLocation)
+ WARN("Ignoring unsupported vertex buffer slot %u.\n", start_slot + i);
+ }
+ view_count = max_view_count;
+ }
+
for (i = 0; i < view_count; ++i)
{
if (views && views[i].BufferLocation)
--
2.45.2