mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2024-11-21 16:46:41 -08:00
vkd3d-shader/ir: Implement MAD in two operations if flagged as precise.
With some changes by Giovanni Mascellani.
This commit is contained in:
parent
9112a5be58
commit
23db066922
Notes:
Alexandre Julliard
2024-04-15 22:23:02 +02:00
Approved-by: Giovanni Mascellani (@giomasce) Approved-by: Conor McCarthy (@cmccarthy) Approved-by: Henri Verbeet (@hverbeet) Approved-by: Alexandre Julliard (@julliard) Merge-Request: https://gitlab.winehq.org/wine/vkd3d/-/merge_requests/779
@ -17,6 +17,7 @@
|
||||
*/
|
||||
|
||||
#include "vkd3d_shader_private.h"
|
||||
#include "vkd3d_types.h"
|
||||
|
||||
bool vsir_program_init(struct vsir_program *program, const struct vkd3d_shader_version *version, unsigned int reserve)
|
||||
{
|
||||
@ -159,6 +160,56 @@ static enum vkd3d_result vsir_program_lower_texkill(struct vsir_program *program
|
||||
return VKD3D_OK;
|
||||
}
|
||||
|
||||
/* The Shader Model 5 Assembly documentation states: "If components of a mad
|
||||
* instruction are tagged as precise, the hardware must execute a mad instruction
|
||||
* or the exact equivalent, and it cannot split it into a multiply followed by an add."
|
||||
* But DXIL.rst states the opposite: "Floating point multiply & add. This operation is
|
||||
* not fused for "precise" operations."
|
||||
* Windows drivers seem to conform with the latter, for SM 4-5 and SM 6. */
|
||||
static enum vkd3d_result vsir_program_lower_precise_mad(struct vsir_program *program,
|
||||
struct vkd3d_shader_instruction *mad, unsigned int *tmp_idx)
|
||||
{
|
||||
struct vkd3d_shader_instruction_array *instructions = &program->instructions;
|
||||
struct vkd3d_shader_instruction *mul_ins, *add_ins;
|
||||
size_t pos = mad - instructions->elements;
|
||||
struct vkd3d_shader_dst_param *mul_dst;
|
||||
|
||||
if (!(mad->flags & VKD3DSI_PRECISE_XYZW))
|
||||
return VKD3D_OK;
|
||||
|
||||
if (!shader_instruction_array_insert_at(instructions, pos + 1, 1))
|
||||
return VKD3D_ERROR_OUT_OF_MEMORY;
|
||||
|
||||
if (*tmp_idx == ~0u)
|
||||
*tmp_idx = program->temp_count++;
|
||||
|
||||
mul_ins = &instructions->elements[pos];
|
||||
add_ins = &instructions->elements[pos + 1];
|
||||
|
||||
mul_ins->handler_idx = VKD3DSIH_MUL;
|
||||
mul_ins->src_count = 2;
|
||||
|
||||
if (!(vsir_instruction_init_with_params(program, add_ins, &mul_ins->location, VKD3DSIH_ADD, 1, 2)))
|
||||
return VKD3D_ERROR_OUT_OF_MEMORY;
|
||||
|
||||
add_ins->flags = mul_ins->flags & VKD3DSI_PRECISE_XYZW;
|
||||
|
||||
mul_dst = mul_ins->dst;
|
||||
*add_ins->dst = *mul_dst;
|
||||
|
||||
mul_dst->modifiers = 0;
|
||||
vsir_register_init(&mul_dst->reg, VKD3DSPR_TEMP, mul_ins->src[0].reg.data_type, 1);
|
||||
mul_dst->reg.dimension = add_ins->dst->reg.dimension;
|
||||
mul_dst->reg.idx[0].offset = *tmp_idx;
|
||||
|
||||
add_ins->src[0].reg = mul_dst->reg;
|
||||
add_ins->src[0].swizzle = vsir_swizzle_from_writemask(mul_dst->write_mask);
|
||||
add_ins->src[0].modifiers = 0;
|
||||
add_ins->src[1] = mul_ins->src[2];
|
||||
|
||||
return VKD3D_OK;
|
||||
}
|
||||
|
||||
static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *program)
|
||||
{
|
||||
struct vkd3d_shader_instruction_array *instructions = &program->instructions;
|
||||
@ -176,6 +227,11 @@ static enum vkd3d_result vsir_program_lower_instructions(struct vsir_program *pr
|
||||
return ret;
|
||||
break;
|
||||
|
||||
case VKD3DSIH_MAD:
|
||||
if ((ret = vsir_program_lower_precise_mad(program, ins, &tmp_idx)) < 0)
|
||||
return ret;
|
||||
break;
|
||||
|
||||
case VKD3DSIH_DCL_TEMPS:
|
||||
vkd3d_shader_instruction_make_nop(ins);
|
||||
break;
|
||||
|
@ -1761,6 +1761,31 @@ static inline unsigned int vkd3d_compact_swizzle(uint32_t swizzle, uint32_t writ
|
||||
return compacted_swizzle;
|
||||
}
|
||||
|
||||
static inline uint32_t vsir_swizzle_from_writemask(unsigned int writemask)
|
||||
{
|
||||
static const unsigned int swizzles[16] =
|
||||
{
|
||||
0,
|
||||
VKD3D_SHADER_SWIZZLE(X, X, X, X),
|
||||
VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y),
|
||||
VKD3D_SHADER_SWIZZLE(X, Y, X, X),
|
||||
VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z),
|
||||
VKD3D_SHADER_SWIZZLE(X, Z, X, X),
|
||||
VKD3D_SHADER_SWIZZLE(Y, Z, X, X),
|
||||
VKD3D_SHADER_SWIZZLE(X, Y, Z, X),
|
||||
VKD3D_SHADER_SWIZZLE(W, W, W, W),
|
||||
VKD3D_SHADER_SWIZZLE(X, W, X, X),
|
||||
VKD3D_SHADER_SWIZZLE(Y, W, X, X),
|
||||
VKD3D_SHADER_SWIZZLE(X, Y, W, X),
|
||||
VKD3D_SHADER_SWIZZLE(Z, W, X, X),
|
||||
VKD3D_SHADER_SWIZZLE(X, Z, W, X),
|
||||
VKD3D_SHADER_SWIZZLE(Y, Z, W, X),
|
||||
VKD3D_SHADER_SWIZZLE(X, Y, Z, W),
|
||||
};
|
||||
|
||||
return swizzles[writemask & 0xf];
|
||||
}
|
||||
|
||||
struct vkd3d_struct
|
||||
{
|
||||
enum vkd3d_shader_structure_type type;
|
||||
|
@ -121,7 +121,7 @@ uniform 0 float4 1.00000007 -42.1 4.0 45.0
|
||||
uniform 4 float4 1.625 -5.0 4.125 5.0
|
||||
uniform 8 float4 1.00000007 -1.0 0.5 -0.5
|
||||
todo(sm<6) draw quad
|
||||
todo probe all rgba (2.62500048, 209.5, 17.0, 224.5)
|
||||
probe all rgba (2.62500048, 209.5, 17.0, 224.5)
|
||||
|
||||
[require]
|
||||
shader model >= 5.0
|
||||
|
Loading…
Reference in New Issue
Block a user