mirror of
https://gitlab.winehq.org/wine/vkd3d.git
synced 2024-11-21 16:46:41 -08:00
d6d9aab31c
The practical effect this has is that we avoid potential trailing padding at the end of DXBC blobs. Unfortunately this also means we need to be more careful about using bytecode_get_size() to find the offset where subsequent data would get written, although in many cases this follows a put_u32() call.
2532 lines
85 KiB
C
2532 lines
85 KiB
C
/*
|
|
* HLSL code generation for DXBC shader models 4-5
|
|
*
|
|
* Copyright 2019-2020 Zebediah Figura for CodeWeavers
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
|
|
*/
|
|
|
|
#include "hlsl.h"
|
|
#include <stdio.h>
|
|
#include "vkd3d_d3dcommon.h"
|
|
#include "sm4.h"
|
|
|
|
static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_block *block);
|
|
|
|
static bool type_is_integer(const struct hlsl_type *type)
|
|
{
|
|
switch (type->base_type)
|
|
{
|
|
case HLSL_TYPE_BOOL:
|
|
case HLSL_TYPE_INT:
|
|
case HLSL_TYPE_UINT:
|
|
return true;
|
|
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
bool hlsl_sm4_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic,
|
|
bool output, enum vkd3d_sm4_register_type *type, enum vkd3d_sm4_swizzle_type *swizzle_type, bool *has_idx)
|
|
{
|
|
unsigned int i;
|
|
|
|
static const struct
|
|
{
|
|
const char *semantic;
|
|
bool output;
|
|
enum vkd3d_shader_type shader_type;
|
|
enum vkd3d_sm4_swizzle_type swizzle_type;
|
|
enum vkd3d_sm4_register_type type;
|
|
bool has_idx;
|
|
}
|
|
register_table[] =
|
|
{
|
|
{"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_ID, false},
|
|
{"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_THREAD_GROUP_ID, false},
|
|
{"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM5_RT_LOCAL_THREAD_ID, false},
|
|
|
|
{"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, VKD3D_SM4_SWIZZLE_NONE, VKD3D_SM4_RT_PRIMID, false},
|
|
|
|
/* Put sv_target in this table, instead of letting it fall through to
|
|
* default varying allocation, so that the register index matches the
|
|
* usage index. */
|
|
{"color", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true},
|
|
{"depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false},
|
|
{"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_DEPTHOUT, false},
|
|
{"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, VKD3D_SM4_SWIZZLE_VEC4, VKD3D_SM4_RT_OUTPUT, true},
|
|
};
|
|
|
|
for (i = 0; i < ARRAY_SIZE(register_table); ++i)
|
|
{
|
|
if (!ascii_strcasecmp(semantic->name, register_table[i].semantic)
|
|
&& output == register_table[i].output
|
|
&& ctx->profile->type == register_table[i].shader_type)
|
|
{
|
|
*type = register_table[i].type;
|
|
if (swizzle_type)
|
|
*swizzle_type = register_table[i].swizzle_type;
|
|
*has_idx = register_table[i].has_idx;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool hlsl_sm4_usage_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic,
|
|
bool output, D3D_NAME *usage)
|
|
{
|
|
unsigned int i;
|
|
|
|
static const struct
|
|
{
|
|
const char *name;
|
|
bool output;
|
|
enum vkd3d_shader_type shader_type;
|
|
D3DDECLUSAGE usage;
|
|
}
|
|
semantics[] =
|
|
{
|
|
{"sv_dispatchthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u},
|
|
{"sv_groupid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u},
|
|
{"sv_groupthreadid", false, VKD3D_SHADER_TYPE_COMPUTE, ~0u},
|
|
|
|
{"position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION},
|
|
{"sv_position", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION},
|
|
{"sv_primitiveid", false, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID},
|
|
|
|
{"position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION},
|
|
{"sv_position", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_POSITION},
|
|
{"sv_primitiveid", true, VKD3D_SHADER_TYPE_GEOMETRY, D3D_NAME_PRIMITIVE_ID},
|
|
|
|
{"position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION},
|
|
{"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_POSITION},
|
|
|
|
{"color", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET},
|
|
{"depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH},
|
|
{"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_TARGET},
|
|
{"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, D3D_NAME_DEPTH},
|
|
|
|
{"sv_position", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_UNDEFINED},
|
|
{"sv_vertexid", false, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_VERTEX_ID},
|
|
|
|
{"position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION},
|
|
{"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, D3D_NAME_POSITION},
|
|
};
|
|
|
|
for (i = 0; i < ARRAY_SIZE(semantics); ++i)
|
|
{
|
|
if (!ascii_strcasecmp(semantic->name, semantics[i].name)
|
|
&& output == semantics[i].output
|
|
&& ctx->profile->type == semantics[i].shader_type
|
|
&& !ascii_strncasecmp(semantic->name, "sv_", 3))
|
|
{
|
|
*usage = semantics[i].usage;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
if (!ascii_strncasecmp(semantic->name, "sv_", 3))
|
|
return false;
|
|
|
|
*usage = D3D_NAME_UNDEFINED;
|
|
return true;
|
|
}
|
|
|
|
static void write_sm4_signature(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc, bool output)
|
|
{
|
|
struct vkd3d_bytecode_buffer buffer = {0};
|
|
struct vkd3d_string_buffer *string;
|
|
const struct hlsl_ir_var *var;
|
|
size_t count_position;
|
|
unsigned int i;
|
|
bool ret;
|
|
|
|
count_position = put_u32(&buffer, 0);
|
|
put_u32(&buffer, 8); /* unknown */
|
|
|
|
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
|
|
{
|
|
unsigned int width = (1u << var->data_type->dimx) - 1, use_mask;
|
|
enum vkd3d_sm4_register_type type;
|
|
uint32_t usage_idx, reg_idx;
|
|
D3D_NAME usage;
|
|
bool has_idx;
|
|
|
|
if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic))
|
|
continue;
|
|
|
|
ret = hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage);
|
|
assert(ret);
|
|
if (usage == ~0u)
|
|
continue;
|
|
usage_idx = var->semantic.index;
|
|
|
|
if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &type, NULL, &has_idx))
|
|
{
|
|
reg_idx = has_idx ? var->semantic.index : ~0u;
|
|
}
|
|
else
|
|
{
|
|
assert(var->regs[HLSL_REGSET_NUMERIC].allocated);
|
|
type = VKD3D_SM4_RT_INPUT;
|
|
reg_idx = var->regs[HLSL_REGSET_NUMERIC].id;
|
|
}
|
|
|
|
use_mask = width; /* FIXME: accurately report use mask */
|
|
if (output)
|
|
use_mask = 0xf ^ use_mask;
|
|
|
|
/* Special pixel shader semantics (TARGET, DEPTH, COVERAGE). */
|
|
if (usage >= 64)
|
|
usage = 0;
|
|
|
|
put_u32(&buffer, 0); /* name */
|
|
put_u32(&buffer, usage_idx);
|
|
put_u32(&buffer, usage);
|
|
switch (var->data_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
case HLSL_TYPE_HALF:
|
|
put_u32(&buffer, D3D_REGISTER_COMPONENT_FLOAT32);
|
|
break;
|
|
|
|
case HLSL_TYPE_INT:
|
|
put_u32(&buffer, D3D_REGISTER_COMPONENT_SINT32);
|
|
break;
|
|
|
|
case HLSL_TYPE_BOOL:
|
|
case HLSL_TYPE_UINT:
|
|
put_u32(&buffer, D3D_REGISTER_COMPONENT_UINT32);
|
|
break;
|
|
|
|
default:
|
|
if ((string = hlsl_type_to_string(ctx, var->data_type)))
|
|
hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
|
|
"Invalid data type %s for semantic variable %s.", string->buffer, var->name);
|
|
hlsl_release_string_buffer(ctx, string);
|
|
put_u32(&buffer, D3D_REGISTER_COMPONENT_UNKNOWN);
|
|
}
|
|
put_u32(&buffer, reg_idx);
|
|
put_u32(&buffer, vkd3d_make_u16(width, use_mask));
|
|
}
|
|
|
|
i = 0;
|
|
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
|
|
{
|
|
const char *semantic = var->semantic.name;
|
|
size_t string_offset;
|
|
D3D_NAME usage;
|
|
|
|
if ((output && !var->is_output_semantic) || (!output && !var->is_input_semantic))
|
|
continue;
|
|
|
|
hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage);
|
|
if (usage == ~0u)
|
|
continue;
|
|
|
|
if (usage == D3D_NAME_TARGET && !ascii_strcasecmp(semantic, "color"))
|
|
string_offset = put_string(&buffer, "SV_Target");
|
|
else if (usage == D3D_NAME_DEPTH && !ascii_strcasecmp(semantic, "depth"))
|
|
string_offset = put_string(&buffer, "SV_Depth");
|
|
else if (usage == D3D_NAME_POSITION && !ascii_strcasecmp(semantic, "position"))
|
|
string_offset = put_string(&buffer, "SV_Position");
|
|
else
|
|
string_offset = put_string(&buffer, semantic);
|
|
set_u32(&buffer, (2 + i++ * 6) * sizeof(uint32_t), string_offset);
|
|
}
|
|
|
|
set_u32(&buffer, count_position, i);
|
|
|
|
dxbc_writer_add_section(dxbc, output ? TAG_OSGN : TAG_ISGN, buffer.data, buffer.size);
|
|
}
|
|
|
|
static const struct hlsl_type *get_array_type(const struct hlsl_type *type)
|
|
{
|
|
if (type->class == HLSL_CLASS_ARRAY)
|
|
return get_array_type(type->e.array.type);
|
|
return type;
|
|
}
|
|
|
|
static unsigned int get_array_size(const struct hlsl_type *type)
|
|
{
|
|
if (type->class == HLSL_CLASS_ARRAY)
|
|
return get_array_size(type->e.array.type) * type->e.array.elements_count;
|
|
return 1;
|
|
}
|
|
|
|
static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type)
|
|
{
|
|
switch (type->class)
|
|
{
|
|
case HLSL_CLASS_ARRAY:
|
|
return sm4_class(type->e.array.type);
|
|
case HLSL_CLASS_MATRIX:
|
|
assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
|
|
if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
|
|
return D3D_SVC_MATRIX_COLUMNS;
|
|
else
|
|
return D3D_SVC_MATRIX_ROWS;
|
|
case HLSL_CLASS_OBJECT:
|
|
return D3D_SVC_OBJECT;
|
|
case HLSL_CLASS_SCALAR:
|
|
return D3D_SVC_SCALAR;
|
|
case HLSL_CLASS_STRUCT:
|
|
return D3D_SVC_STRUCT;
|
|
case HLSL_CLASS_VECTOR:
|
|
return D3D_SVC_VECTOR;
|
|
default:
|
|
ERR("Invalid class %#x.\n", type->class);
|
|
vkd3d_unreachable();
|
|
}
|
|
}
|
|
|
|
static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type)
|
|
{
|
|
switch (type->base_type)
|
|
{
|
|
case HLSL_TYPE_BOOL:
|
|
return D3D_SVT_BOOL;
|
|
case HLSL_TYPE_DOUBLE:
|
|
return D3D_SVT_DOUBLE;
|
|
case HLSL_TYPE_FLOAT:
|
|
case HLSL_TYPE_HALF:
|
|
return D3D_SVT_FLOAT;
|
|
case HLSL_TYPE_INT:
|
|
return D3D_SVT_INT;
|
|
case HLSL_TYPE_PIXELSHADER:
|
|
return D3D_SVT_PIXELSHADER;
|
|
case HLSL_TYPE_SAMPLER:
|
|
switch (type->sampler_dim)
|
|
{
|
|
case HLSL_SAMPLER_DIM_1D:
|
|
return D3D_SVT_SAMPLER1D;
|
|
case HLSL_SAMPLER_DIM_2D:
|
|
return D3D_SVT_SAMPLER2D;
|
|
case HLSL_SAMPLER_DIM_3D:
|
|
return D3D_SVT_SAMPLER3D;
|
|
case HLSL_SAMPLER_DIM_CUBE:
|
|
return D3D_SVT_SAMPLERCUBE;
|
|
case HLSL_SAMPLER_DIM_GENERIC:
|
|
return D3D_SVT_SAMPLER;
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
break;
|
|
case HLSL_TYPE_STRING:
|
|
return D3D_SVT_STRING;
|
|
case HLSL_TYPE_TEXTURE:
|
|
switch (type->sampler_dim)
|
|
{
|
|
case HLSL_SAMPLER_DIM_1D:
|
|
return D3D_SVT_TEXTURE1D;
|
|
case HLSL_SAMPLER_DIM_2D:
|
|
return D3D_SVT_TEXTURE2D;
|
|
case HLSL_SAMPLER_DIM_3D:
|
|
return D3D_SVT_TEXTURE3D;
|
|
case HLSL_SAMPLER_DIM_CUBE:
|
|
return D3D_SVT_TEXTURECUBE;
|
|
case HLSL_SAMPLER_DIM_GENERIC:
|
|
return D3D_SVT_TEXTURE;
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
break;
|
|
case HLSL_TYPE_UINT:
|
|
return D3D_SVT_UINT;
|
|
case HLSL_TYPE_VERTEXSHADER:
|
|
return D3D_SVT_VERTEXSHADER;
|
|
case HLSL_TYPE_VOID:
|
|
return D3D_SVT_VOID;
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
}
|
|
|
|
static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type)
|
|
{
|
|
const struct hlsl_type *array_type = get_array_type(type);
|
|
const char *name = array_type->name ? array_type->name : "<unnamed>";
|
|
const struct hlsl_profile_info *profile = ctx->profile;
|
|
unsigned int field_count = 0, array_size = 0;
|
|
size_t fields_offset = 0, name_offset = 0;
|
|
size_t i;
|
|
|
|
if (type->bytecode_offset)
|
|
return;
|
|
|
|
if (profile->major_version >= 5)
|
|
name_offset = put_string(buffer, name);
|
|
|
|
if (type->class == HLSL_CLASS_ARRAY)
|
|
array_size = get_array_size(type);
|
|
|
|
if (array_type->class == HLSL_CLASS_STRUCT)
|
|
{
|
|
field_count = array_type->e.record.field_count;
|
|
|
|
for (i = 0; i < field_count; ++i)
|
|
{
|
|
struct hlsl_struct_field *field = &array_type->e.record.fields[i];
|
|
|
|
field->name_bytecode_offset = put_string(buffer, field->name);
|
|
write_sm4_type(ctx, buffer, field->type);
|
|
}
|
|
|
|
fields_offset = bytecode_get_next_offset(buffer);
|
|
|
|
for (i = 0; i < field_count; ++i)
|
|
{
|
|
struct hlsl_struct_field *field = &array_type->e.record.fields[i];
|
|
|
|
put_u32(buffer, field->name_bytecode_offset);
|
|
put_u32(buffer, field->type->bytecode_offset);
|
|
put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC]);
|
|
}
|
|
}
|
|
|
|
type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(type), sm4_base_type(type)));
|
|
put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx));
|
|
put_u32(buffer, vkd3d_make_u32(array_size, field_count));
|
|
put_u32(buffer, fields_offset);
|
|
|
|
if (profile->major_version >= 5)
|
|
{
|
|
put_u32(buffer, 0); /* FIXME: unknown */
|
|
put_u32(buffer, 0); /* FIXME: unknown */
|
|
put_u32(buffer, 0); /* FIXME: unknown */
|
|
put_u32(buffer, 0); /* FIXME: unknown */
|
|
put_u32(buffer, name_offset);
|
|
}
|
|
}
|
|
|
|
static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type)
|
|
{
|
|
switch (type->base_type)
|
|
{
|
|
case HLSL_TYPE_SAMPLER:
|
|
return D3D_SIT_SAMPLER;
|
|
case HLSL_TYPE_TEXTURE:
|
|
return D3D_SIT_TEXTURE;
|
|
case HLSL_TYPE_UAV:
|
|
return D3D_SIT_UAV_RWTYPED;
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
}
|
|
|
|
static D3D_RESOURCE_RETURN_TYPE sm4_resource_format(const struct hlsl_type *type)
|
|
{
|
|
switch (type->e.resource_format->base_type)
|
|
{
|
|
case HLSL_TYPE_DOUBLE:
|
|
return D3D_RETURN_TYPE_DOUBLE;
|
|
|
|
case HLSL_TYPE_FLOAT:
|
|
case HLSL_TYPE_HALF:
|
|
return D3D_RETURN_TYPE_FLOAT;
|
|
|
|
case HLSL_TYPE_INT:
|
|
return D3D_RETURN_TYPE_SINT;
|
|
break;
|
|
|
|
case HLSL_TYPE_BOOL:
|
|
case HLSL_TYPE_UINT:
|
|
return D3D_RETURN_TYPE_UINT;
|
|
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
}
|
|
|
|
static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type)
|
|
{
|
|
switch (type->sampler_dim)
|
|
{
|
|
case HLSL_SAMPLER_DIM_1D:
|
|
return D3D_SRV_DIMENSION_TEXTURE1D;
|
|
case HLSL_SAMPLER_DIM_2D:
|
|
return D3D_SRV_DIMENSION_TEXTURE2D;
|
|
case HLSL_SAMPLER_DIM_3D:
|
|
return D3D_SRV_DIMENSION_TEXTURE3D;
|
|
case HLSL_SAMPLER_DIM_CUBE:
|
|
return D3D_SRV_DIMENSION_TEXTURECUBE;
|
|
case HLSL_SAMPLER_DIM_1DARRAY:
|
|
return D3D_SRV_DIMENSION_TEXTURE1DARRAY;
|
|
case HLSL_SAMPLER_DIM_2DARRAY:
|
|
return D3D_SRV_DIMENSION_TEXTURE2DARRAY;
|
|
case HLSL_SAMPLER_DIM_2DMS:
|
|
return D3D_SRV_DIMENSION_TEXTURE2DMS;
|
|
case HLSL_SAMPLER_DIM_2DMSARRAY:
|
|
return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY;
|
|
case HLSL_SAMPLER_DIM_CUBEARRAY:
|
|
return D3D_SRV_DIMENSION_TEXTURECUBEARRAY;
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
}
|
|
|
|
static int sm4_compare_extern_resources(const void *a, const void *b)
|
|
{
|
|
const struct hlsl_ir_var *aa = *(const struct hlsl_ir_var **)a;
|
|
const struct hlsl_ir_var *bb = *(const struct hlsl_ir_var **)b;
|
|
enum hlsl_regset aa_regset, bb_regset;
|
|
|
|
aa_regset = hlsl_type_get_regset(aa->data_type);
|
|
bb_regset = hlsl_type_get_regset(bb->data_type);
|
|
|
|
if (aa_regset != bb_regset)
|
|
return aa_regset - bb_regset;
|
|
|
|
return aa->regs[aa_regset].id - bb->regs[bb_regset].id;
|
|
}
|
|
|
|
static const struct hlsl_ir_var **sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count)
|
|
{
|
|
const struct hlsl_ir_var **extern_resources = NULL;
|
|
const struct hlsl_ir_var *var;
|
|
enum hlsl_regset regset;
|
|
size_t capacity = 0;
|
|
|
|
*count = 0;
|
|
|
|
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
|
|
{
|
|
if (!hlsl_type_is_resource(var->data_type))
|
|
continue;
|
|
regset = hlsl_type_get_regset(var->data_type);
|
|
if (!var->regs[regset].allocated)
|
|
continue;
|
|
|
|
if (!(hlsl_array_reserve(ctx, (void **)&extern_resources, &capacity, *count + 1,
|
|
sizeof(*extern_resources))))
|
|
{
|
|
*count = 0;
|
|
return NULL;
|
|
}
|
|
|
|
extern_resources[*count] = var;
|
|
++*count;
|
|
}
|
|
|
|
qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources);
|
|
return extern_resources;
|
|
}
|
|
|
|
static void write_sm4_rdef(struct hlsl_ctx *ctx, struct dxbc_writer *dxbc)
|
|
{
|
|
unsigned int cbuffer_count = 0, resource_count = 0, extern_resources_count, i, j;
|
|
size_t cbuffers_offset, resources_offset, creator_offset, string_offset;
|
|
size_t cbuffer_position, resource_position, creator_position;
|
|
const struct hlsl_profile_info *profile = ctx->profile;
|
|
const struct hlsl_ir_var **extern_resources;
|
|
struct vkd3d_bytecode_buffer buffer = {0};
|
|
const struct hlsl_buffer *cbuffer;
|
|
const struct hlsl_ir_var *var;
|
|
|
|
static const uint16_t target_types[] =
|
|
{
|
|
0xffff, /* PIXEL */
|
|
0xfffe, /* VERTEX */
|
|
0x4753, /* GEOMETRY */
|
|
0x4853, /* HULL */
|
|
0x4453, /* DOMAIN */
|
|
0x4353, /* COMPUTE */
|
|
};
|
|
|
|
extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
|
|
|
|
resource_count += extern_resources_count;
|
|
LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
|
|
{
|
|
if (cbuffer->reg.allocated)
|
|
{
|
|
++cbuffer_count;
|
|
++resource_count;
|
|
}
|
|
}
|
|
|
|
put_u32(&buffer, cbuffer_count);
|
|
cbuffer_position = put_u32(&buffer, 0);
|
|
put_u32(&buffer, resource_count);
|
|
resource_position = put_u32(&buffer, 0);
|
|
put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version),
|
|
target_types[profile->type]));
|
|
put_u32(&buffer, 0); /* FIXME: compilation flags */
|
|
creator_position = put_u32(&buffer, 0);
|
|
|
|
if (profile->major_version >= 5)
|
|
{
|
|
put_u32(&buffer, TAG_RD11);
|
|
put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */
|
|
put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */
|
|
put_u32(&buffer, 8 * sizeof(uint32_t)); /* size of binding desc */
|
|
put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */
|
|
put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */
|
|
put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */
|
|
put_u32(&buffer, 0); /* unknown; possibly a null terminator */
|
|
}
|
|
|
|
/* Bound resources. */
|
|
|
|
resources_offset = bytecode_get_next_offset(&buffer);
|
|
set_u32(&buffer, resource_position, resources_offset);
|
|
|
|
for (i = 0; i < extern_resources_count; ++i)
|
|
{
|
|
enum hlsl_regset regset;
|
|
uint32_t flags = 0;
|
|
|
|
var = extern_resources[i];
|
|
regset = hlsl_type_get_regset(var->data_type);
|
|
|
|
if (var->reg_reservation.reg_type)
|
|
flags |= D3D_SIF_USERPACKED;
|
|
|
|
put_u32(&buffer, 0); /* name */
|
|
put_u32(&buffer, sm4_resource_type(var->data_type));
|
|
if (regset == HLSL_REGSET_SAMPLERS)
|
|
{
|
|
put_u32(&buffer, 0);
|
|
put_u32(&buffer, 0);
|
|
put_u32(&buffer, 0);
|
|
}
|
|
else
|
|
{
|
|
put_u32(&buffer, sm4_resource_format(var->data_type));
|
|
put_u32(&buffer, sm4_rdef_resource_dimension(var->data_type));
|
|
put_u32(&buffer, ~0u); /* FIXME: multisample count */
|
|
flags |= (var->data_type->e.resource_format->dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT;
|
|
}
|
|
put_u32(&buffer, var->regs[regset].id);
|
|
put_u32(&buffer, 1); /* bind count */
|
|
put_u32(&buffer, flags);
|
|
}
|
|
|
|
LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
|
|
{
|
|
uint32_t flags = 0;
|
|
|
|
if (!cbuffer->reg.allocated)
|
|
continue;
|
|
|
|
if (cbuffer->reservation.reg_type)
|
|
flags |= D3D_SIF_USERPACKED;
|
|
|
|
put_u32(&buffer, 0); /* name */
|
|
put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER);
|
|
put_u32(&buffer, 0); /* return type */
|
|
put_u32(&buffer, 0); /* dimension */
|
|
put_u32(&buffer, 0); /* multisample count */
|
|
put_u32(&buffer, cbuffer->reg.id); /* bind point */
|
|
put_u32(&buffer, 1); /* bind count */
|
|
put_u32(&buffer, flags); /* flags */
|
|
}
|
|
|
|
for (i = 0; i < extern_resources_count; ++i)
|
|
{
|
|
var = extern_resources[i];
|
|
|
|
string_offset = put_string(&buffer, var->name);
|
|
set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset);
|
|
}
|
|
|
|
LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
|
|
{
|
|
if (!cbuffer->reg.allocated)
|
|
continue;
|
|
|
|
string_offset = put_string(&buffer, cbuffer->name);
|
|
set_u32(&buffer, resources_offset + i++ * 8 * sizeof(uint32_t), string_offset);
|
|
}
|
|
|
|
/* Buffers. */
|
|
|
|
cbuffers_offset = bytecode_get_next_offset(&buffer);
|
|
set_u32(&buffer, cbuffer_position, cbuffers_offset);
|
|
LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
|
|
{
|
|
unsigned int var_count = 0;
|
|
|
|
if (!cbuffer->reg.allocated)
|
|
continue;
|
|
|
|
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
|
|
{
|
|
if (var->is_uniform && var->buffer == cbuffer)
|
|
++var_count;
|
|
}
|
|
|
|
put_u32(&buffer, 0); /* name */
|
|
put_u32(&buffer, var_count);
|
|
put_u32(&buffer, 0); /* variable offset */
|
|
put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float));
|
|
put_u32(&buffer, 0); /* FIXME: flags */
|
|
put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER);
|
|
}
|
|
|
|
i = 0;
|
|
LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
|
|
{
|
|
if (!cbuffer->reg.allocated)
|
|
continue;
|
|
|
|
string_offset = put_string(&buffer, cbuffer->name);
|
|
set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset);
|
|
}
|
|
|
|
i = 0;
|
|
LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
|
|
{
|
|
size_t vars_start = bytecode_get_next_offset(&buffer);
|
|
|
|
if (!cbuffer->reg.allocated)
|
|
continue;
|
|
|
|
set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start);
|
|
|
|
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
|
|
{
|
|
if (var->is_uniform && var->buffer == cbuffer)
|
|
{
|
|
uint32_t flags = 0;
|
|
|
|
if (var->last_read)
|
|
flags |= D3D_SVF_USED;
|
|
|
|
put_u32(&buffer, 0); /* name */
|
|
put_u32(&buffer, var->buffer_offset * sizeof(float));
|
|
put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float));
|
|
put_u32(&buffer, flags);
|
|
put_u32(&buffer, 0); /* type */
|
|
put_u32(&buffer, 0); /* FIXME: default value */
|
|
|
|
if (profile->major_version >= 5)
|
|
{
|
|
put_u32(&buffer, 0); /* texture start */
|
|
put_u32(&buffer, 0); /* texture count */
|
|
put_u32(&buffer, 0); /* sampler start */
|
|
put_u32(&buffer, 0); /* sampler count */
|
|
}
|
|
}
|
|
}
|
|
|
|
j = 0;
|
|
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
|
|
{
|
|
if (var->is_uniform && var->buffer == cbuffer)
|
|
{
|
|
const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6);
|
|
size_t var_offset = vars_start + j * var_size * sizeof(uint32_t);
|
|
size_t string_offset = put_string(&buffer, var->name);
|
|
|
|
set_u32(&buffer, var_offset, string_offset);
|
|
write_sm4_type(ctx, &buffer, var->data_type);
|
|
set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset);
|
|
++j;
|
|
}
|
|
}
|
|
}
|
|
|
|
creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL));
|
|
set_u32(&buffer, creator_position, creator_offset);
|
|
|
|
dxbc_writer_add_section(dxbc, TAG_RDEF, buffer.data, buffer.size);
|
|
|
|
vkd3d_free(extern_resources);
|
|
}
|
|
|
|
static enum vkd3d_sm4_resource_type sm4_resource_dimension(const struct hlsl_type *type)
|
|
{
|
|
switch (type->sampler_dim)
|
|
{
|
|
case HLSL_SAMPLER_DIM_1D:
|
|
return VKD3D_SM4_RESOURCE_TEXTURE_1D;
|
|
case HLSL_SAMPLER_DIM_2D:
|
|
return VKD3D_SM4_RESOURCE_TEXTURE_2D;
|
|
case HLSL_SAMPLER_DIM_3D:
|
|
return VKD3D_SM4_RESOURCE_TEXTURE_3D;
|
|
case HLSL_SAMPLER_DIM_CUBE:
|
|
return VKD3D_SM4_RESOURCE_TEXTURE_CUBE;
|
|
case HLSL_SAMPLER_DIM_1DARRAY:
|
|
return VKD3D_SM4_RESOURCE_TEXTURE_1DARRAY;
|
|
case HLSL_SAMPLER_DIM_2DARRAY:
|
|
return VKD3D_SM4_RESOURCE_TEXTURE_2DARRAY;
|
|
case HLSL_SAMPLER_DIM_2DMS:
|
|
return VKD3D_SM4_RESOURCE_TEXTURE_2DMS;
|
|
case HLSL_SAMPLER_DIM_2DMSARRAY:
|
|
return VKD3D_SM4_RESOURCE_TEXTURE_2DMSARRAY;
|
|
case HLSL_SAMPLER_DIM_CUBEARRAY:
|
|
return VKD3D_SM4_RESOURCE_TEXTURE_CUBEARRAY;
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
}
|
|
|
|
struct sm4_instruction_modifier
|
|
{
|
|
enum vkd3d_sm4_instruction_modifier type;
|
|
|
|
union
|
|
{
|
|
struct
|
|
{
|
|
int u, v, w;
|
|
} aoffimmi;
|
|
} u;
|
|
};
|
|
|
|
static uint32_t sm4_encode_instruction_modifier(const struct sm4_instruction_modifier *imod)
|
|
{
|
|
uint32_t word = 0;
|
|
|
|
word |= VKD3D_SM4_MODIFIER_MASK & imod->type;
|
|
|
|
switch (imod->type)
|
|
{
|
|
case VKD3D_SM4_MODIFIER_AOFFIMMI:
|
|
assert(-8 <= imod->u.aoffimmi.u && imod->u.aoffimmi.u <= 7);
|
|
assert(-8 <= imod->u.aoffimmi.v && imod->u.aoffimmi.v <= 7);
|
|
assert(-8 <= imod->u.aoffimmi.w && imod->u.aoffimmi.w <= 7);
|
|
word |= ((uint32_t)imod->u.aoffimmi.u & 0xf) << VKD3D_SM4_AOFFIMMI_U_SHIFT;
|
|
word |= ((uint32_t)imod->u.aoffimmi.v & 0xf) << VKD3D_SM4_AOFFIMMI_V_SHIFT;
|
|
word |= ((uint32_t)imod->u.aoffimmi.w & 0xf) << VKD3D_SM4_AOFFIMMI_W_SHIFT;
|
|
break;
|
|
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
|
|
return word;
|
|
}
|
|
|
|
struct sm4_register
|
|
{
|
|
enum vkd3d_sm4_register_type type;
|
|
uint32_t idx[2];
|
|
unsigned int idx_count;
|
|
enum vkd3d_sm4_dimension dim;
|
|
uint32_t immconst_uint[4];
|
|
unsigned int mod;
|
|
};
|
|
|
|
struct sm4_instruction
|
|
{
|
|
enum vkd3d_sm4_opcode opcode;
|
|
|
|
struct sm4_instruction_modifier modifiers[1];
|
|
unsigned int modifier_count;
|
|
|
|
struct sm4_dst_register
|
|
{
|
|
struct sm4_register reg;
|
|
unsigned int writemask;
|
|
} dsts[2];
|
|
unsigned int dst_count;
|
|
|
|
struct sm4_src_register
|
|
{
|
|
struct sm4_register reg;
|
|
enum vkd3d_sm4_swizzle_type swizzle_type;
|
|
unsigned int swizzle;
|
|
} srcs[4];
|
|
unsigned int src_count;
|
|
|
|
uint32_t idx[3];
|
|
unsigned int idx_count;
|
|
};
|
|
|
|
static void sm4_register_from_deref(struct hlsl_ctx *ctx, struct sm4_register *reg,
|
|
unsigned int *writemask, enum vkd3d_sm4_swizzle_type *swizzle_type,
|
|
const struct hlsl_deref *deref, const struct hlsl_type *data_type)
|
|
{
|
|
const struct hlsl_ir_var *var = deref->var;
|
|
|
|
if (var->is_uniform)
|
|
{
|
|
if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_TEXTURE)
|
|
{
|
|
reg->type = VKD3D_SM4_RT_RESOURCE;
|
|
reg->dim = VKD3D_SM4_DIMENSION_VEC4;
|
|
if (swizzle_type)
|
|
*swizzle_type = VKD3D_SM4_SWIZZLE_VEC4;
|
|
reg->idx[0] = var->regs[HLSL_REGSET_TEXTURES].id;
|
|
reg->idx_count = 1;
|
|
*writemask = VKD3DSP_WRITEMASK_ALL;
|
|
}
|
|
else if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_UAV)
|
|
{
|
|
reg->type = VKD3D_SM5_RT_UAV;
|
|
reg->dim = VKD3D_SM4_DIMENSION_VEC4;
|
|
if (swizzle_type)
|
|
*swizzle_type = VKD3D_SM4_SWIZZLE_VEC4;
|
|
reg->idx[0] = var->regs[HLSL_REGSET_UAVS].id;
|
|
reg->idx_count = 1;
|
|
*writemask = VKD3DSP_WRITEMASK_ALL;
|
|
}
|
|
else if (data_type->class == HLSL_CLASS_OBJECT && data_type->base_type == HLSL_TYPE_SAMPLER)
|
|
{
|
|
reg->type = VKD3D_SM4_RT_SAMPLER;
|
|
reg->dim = VKD3D_SM4_DIMENSION_NONE;
|
|
if (swizzle_type)
|
|
*swizzle_type = VKD3D_SM4_SWIZZLE_NONE;
|
|
reg->idx[0] = var->regs[HLSL_REGSET_SAMPLERS].id;
|
|
reg->idx_count = 1;
|
|
*writemask = VKD3DSP_WRITEMASK_ALL;
|
|
}
|
|
else
|
|
{
|
|
unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref) + var->buffer_offset;
|
|
|
|
assert(data_type->class <= HLSL_CLASS_VECTOR);
|
|
reg->type = VKD3D_SM4_RT_CONSTBUFFER;
|
|
reg->dim = VKD3D_SM4_DIMENSION_VEC4;
|
|
if (swizzle_type)
|
|
*swizzle_type = VKD3D_SM4_SWIZZLE_VEC4;
|
|
reg->idx[0] = var->buffer->reg.id;
|
|
reg->idx[1] = offset / 4;
|
|
reg->idx_count = 2;
|
|
*writemask = ((1u << data_type->dimx) - 1) << (offset & 3);
|
|
}
|
|
}
|
|
else if (var->is_input_semantic)
|
|
{
|
|
bool has_idx;
|
|
|
|
if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, false, ®->type, swizzle_type, &has_idx))
|
|
{
|
|
unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
|
|
|
|
if (has_idx)
|
|
{
|
|
reg->idx[0] = var->semantic.index + offset / 4;
|
|
reg->idx_count = 1;
|
|
}
|
|
|
|
reg->dim = VKD3D_SM4_DIMENSION_VEC4;
|
|
*writemask = ((1u << data_type->dimx) - 1) << (offset % 4);
|
|
}
|
|
else
|
|
{
|
|
struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
|
|
|
|
assert(hlsl_reg.allocated);
|
|
reg->type = VKD3D_SM4_RT_INPUT;
|
|
reg->dim = VKD3D_SM4_DIMENSION_VEC4;
|
|
if (swizzle_type)
|
|
*swizzle_type = VKD3D_SM4_SWIZZLE_VEC4;
|
|
reg->idx[0] = hlsl_reg.id;
|
|
reg->idx_count = 1;
|
|
*writemask = hlsl_reg.writemask;
|
|
}
|
|
}
|
|
else if (var->is_output_semantic)
|
|
{
|
|
bool has_idx;
|
|
|
|
if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, true, ®->type, swizzle_type, &has_idx))
|
|
{
|
|
unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
|
|
|
|
if (has_idx)
|
|
{
|
|
reg->idx[0] = var->semantic.index + offset / 4;
|
|
reg->idx_count = 1;
|
|
}
|
|
|
|
if (reg->type == VKD3D_SM4_RT_DEPTHOUT)
|
|
reg->dim = VKD3D_SM4_DIMENSION_SCALAR;
|
|
else
|
|
reg->dim = VKD3D_SM4_DIMENSION_VEC4;
|
|
*writemask = ((1u << data_type->dimx) - 1) << (offset % 4);
|
|
}
|
|
else
|
|
{
|
|
struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
|
|
|
|
assert(hlsl_reg.allocated);
|
|
reg->type = VKD3D_SM4_RT_OUTPUT;
|
|
reg->dim = VKD3D_SM4_DIMENSION_VEC4;
|
|
reg->idx[0] = hlsl_reg.id;
|
|
reg->idx_count = 1;
|
|
*writemask = hlsl_reg.writemask;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
|
|
|
|
assert(hlsl_reg.allocated);
|
|
reg->type = VKD3D_SM4_RT_TEMP;
|
|
reg->dim = VKD3D_SM4_DIMENSION_VEC4;
|
|
if (swizzle_type)
|
|
*swizzle_type = VKD3D_SM4_SWIZZLE_VEC4;
|
|
reg->idx[0] = hlsl_reg.id;
|
|
reg->idx_count = 1;
|
|
*writemask = hlsl_reg.writemask;
|
|
}
|
|
}
|
|
|
|
static void sm4_src_from_deref(struct hlsl_ctx *ctx, struct sm4_src_register *src,
|
|
const struct hlsl_deref *deref, const struct hlsl_type *data_type, unsigned int map_writemask)
|
|
{
|
|
unsigned int writemask;
|
|
|
|
sm4_register_from_deref(ctx, &src->reg, &writemask, &src->swizzle_type, deref, data_type);
|
|
if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4)
|
|
src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask);
|
|
}
|
|
|
|
static void sm4_register_from_node(struct sm4_register *reg, unsigned int *writemask,
|
|
enum vkd3d_sm4_swizzle_type *swizzle_type, const struct hlsl_ir_node *instr)
|
|
{
|
|
assert(instr->reg.allocated);
|
|
reg->type = VKD3D_SM4_RT_TEMP;
|
|
reg->dim = VKD3D_SM4_DIMENSION_VEC4;
|
|
*swizzle_type = VKD3D_SM4_SWIZZLE_VEC4;
|
|
reg->idx[0] = instr->reg.id;
|
|
reg->idx_count = 1;
|
|
*writemask = instr->reg.writemask;
|
|
}
|
|
|
|
static void sm4_dst_from_node(struct sm4_dst_register *dst, const struct hlsl_ir_node *instr)
|
|
{
|
|
unsigned int swizzle_type;
|
|
|
|
sm4_register_from_node(&dst->reg, &dst->writemask, &swizzle_type, instr);
|
|
}
|
|
|
|
static void sm4_src_from_node(struct sm4_src_register *src,
|
|
const struct hlsl_ir_node *instr, unsigned int map_writemask)
|
|
{
|
|
unsigned int writemask;
|
|
|
|
sm4_register_from_node(&src->reg, &writemask, &src->swizzle_type, instr);
|
|
if (src->swizzle_type == VKD3D_SM4_SWIZZLE_VEC4)
|
|
src->swizzle = hlsl_map_swizzle(hlsl_swizzle_from_writemask(writemask), map_writemask);
|
|
}
|
|
|
|
static uint32_t sm4_encode_register(const struct sm4_register *reg)
|
|
{
|
|
return (reg->type << VKD3D_SM4_REGISTER_TYPE_SHIFT)
|
|
| (reg->idx_count << VKD3D_SM4_REGISTER_ORDER_SHIFT)
|
|
| (reg->dim << VKD3D_SM4_DIMENSION_SHIFT);
|
|
}
|
|
|
|
static uint32_t sm4_register_order(const struct sm4_register *reg)
|
|
{
|
|
uint32_t order = 1;
|
|
if (reg->type == VKD3D_SM4_RT_IMMCONST)
|
|
order += reg->dim == VKD3D_SM4_DIMENSION_VEC4 ? 4 : 1;
|
|
order += reg->idx_count;
|
|
if (reg->mod)
|
|
++order;
|
|
return order;
|
|
}
|
|
|
|
static void write_sm4_instruction(struct vkd3d_bytecode_buffer *buffer, const struct sm4_instruction *instr)
|
|
{
|
|
uint32_t token = instr->opcode;
|
|
unsigned int size = 1, i, j;
|
|
|
|
size += instr->modifier_count;
|
|
for (i = 0; i < instr->dst_count; ++i)
|
|
size += sm4_register_order(&instr->dsts[i].reg);
|
|
for (i = 0; i < instr->src_count; ++i)
|
|
size += sm4_register_order(&instr->srcs[i].reg);
|
|
size += instr->idx_count;
|
|
|
|
token |= (size << VKD3D_SM4_INSTRUCTION_LENGTH_SHIFT);
|
|
|
|
if (instr->modifier_count > 0)
|
|
token |= VKD3D_SM4_INSTRUCTION_MODIFIER;
|
|
put_u32(buffer, token);
|
|
|
|
for (i = 0; i < instr->modifier_count; ++i)
|
|
{
|
|
token = sm4_encode_instruction_modifier(&instr->modifiers[i]);
|
|
if (instr->modifier_count > i + 1)
|
|
token |= VKD3D_SM4_INSTRUCTION_MODIFIER;
|
|
put_u32(buffer, token);
|
|
}
|
|
|
|
for (i = 0; i < instr->dst_count; ++i)
|
|
{
|
|
token = sm4_encode_register(&instr->dsts[i].reg);
|
|
if (instr->dsts[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4)
|
|
token |= instr->dsts[i].writemask << VKD3D_SM4_WRITEMASK_SHIFT;
|
|
put_u32(buffer, token);
|
|
|
|
for (j = 0; j < instr->dsts[i].reg.idx_count; ++j)
|
|
put_u32(buffer, instr->dsts[i].reg.idx[j]);
|
|
}
|
|
|
|
for (i = 0; i < instr->src_count; ++i)
|
|
{
|
|
token = sm4_encode_register(&instr->srcs[i].reg);
|
|
token |= (uint32_t)instr->srcs[i].swizzle_type << VKD3D_SM4_SWIZZLE_TYPE_SHIFT;
|
|
token |= instr->srcs[i].swizzle << VKD3D_SM4_SWIZZLE_SHIFT;
|
|
if (instr->srcs[i].reg.mod)
|
|
token |= VKD3D_SM4_EXTENDED_OPERAND;
|
|
put_u32(buffer, token);
|
|
|
|
if (instr->srcs[i].reg.mod)
|
|
put_u32(buffer, (instr->srcs[i].reg.mod << VKD3D_SM4_REGISTER_MODIFIER_SHIFT)
|
|
| VKD3D_SM4_EXTENDED_OPERAND_MODIFIER);
|
|
|
|
for (j = 0; j < instr->srcs[i].reg.idx_count; ++j)
|
|
put_u32(buffer, instr->srcs[i].reg.idx[j]);
|
|
|
|
if (instr->srcs[i].reg.type == VKD3D_SM4_RT_IMMCONST)
|
|
{
|
|
put_u32(buffer, instr->srcs[i].reg.immconst_uint[0]);
|
|
if (instr->srcs[i].reg.dim == VKD3D_SM4_DIMENSION_VEC4)
|
|
{
|
|
put_u32(buffer, instr->srcs[i].reg.immconst_uint[1]);
|
|
put_u32(buffer, instr->srcs[i].reg.immconst_uint[2]);
|
|
put_u32(buffer, instr->srcs[i].reg.immconst_uint[3]);
|
|
}
|
|
}
|
|
}
|
|
|
|
for (j = 0; j < instr->idx_count; ++j)
|
|
put_u32(buffer, instr->idx[j]);
|
|
}
|
|
|
|
static bool encode_texel_offset_as_aoffimmi(struct sm4_instruction *instr,
|
|
const struct hlsl_ir_node *texel_offset)
|
|
{
|
|
struct sm4_instruction_modifier modif;
|
|
struct hlsl_ir_constant *offset;
|
|
|
|
if (!texel_offset || texel_offset->type != HLSL_IR_CONSTANT)
|
|
return false;
|
|
offset = hlsl_ir_constant(texel_offset);
|
|
|
|
modif.type = VKD3D_SM4_MODIFIER_AOFFIMMI;
|
|
modif.u.aoffimmi.u = offset->value[0].i;
|
|
modif.u.aoffimmi.v = offset->value[1].i;
|
|
modif.u.aoffimmi.w = offset->value[2].i;
|
|
if (modif.u.aoffimmi.u < -8 || modif.u.aoffimmi.u > 7
|
|
|| modif.u.aoffimmi.v < -8 || modif.u.aoffimmi.v > 7
|
|
|| modif.u.aoffimmi.w < -8 || modif.u.aoffimmi.w > 7)
|
|
return false;
|
|
|
|
instr->modifiers[instr->modifier_count++] = modif;
|
|
return true;
|
|
}
|
|
|
|
static void write_sm4_dcl_constant_buffer(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_buffer *cbuffer)
|
|
{
|
|
const struct sm4_instruction instr =
|
|
{
|
|
.opcode = VKD3D_SM4_OP_DCL_CONSTANT_BUFFER,
|
|
|
|
.srcs[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4,
|
|
.srcs[0].reg.type = VKD3D_SM4_RT_CONSTBUFFER,
|
|
.srcs[0].reg.idx = {cbuffer->reg.id, (cbuffer->used_size + 3) / 4},
|
|
.srcs[0].reg.idx_count = 2,
|
|
.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_VEC4,
|
|
.srcs[0].swizzle = HLSL_SWIZZLE(X, Y, Z, W),
|
|
.src_count = 1,
|
|
};
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_dcl_sampler(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var)
|
|
{
|
|
const struct sm4_instruction instr =
|
|
{
|
|
.opcode = VKD3D_SM4_OP_DCL_SAMPLER,
|
|
|
|
.dsts[0].reg.type = VKD3D_SM4_RT_SAMPLER,
|
|
.dsts[0].reg.idx = {var->regs[HLSL_REGSET_SAMPLERS].id},
|
|
.dsts[0].reg.idx_count = 1,
|
|
.dst_count = 1,
|
|
};
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_dcl_texture(struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var)
|
|
{
|
|
bool uav = (var->data_type->base_type == HLSL_TYPE_UAV);
|
|
struct sm4_instruction instr =
|
|
{
|
|
.opcode = (uav ? VKD3D_SM5_OP_DCL_UAV_TYPED : VKD3D_SM4_OP_DCL_RESOURCE)
|
|
| (sm4_resource_dimension(var->data_type) << VKD3D_SM4_RESOURCE_TYPE_SHIFT),
|
|
|
|
.dsts[0].reg.type = uav ? VKD3D_SM5_RT_UAV : VKD3D_SM4_RT_RESOURCE,
|
|
.dsts[0].reg.idx = {uav ? var->regs[HLSL_REGSET_UAVS].id : var->regs[HLSL_REGSET_TEXTURES].id},
|
|
.dsts[0].reg.idx_count = 1,
|
|
.dst_count = 1,
|
|
|
|
.idx[0] = sm4_resource_format(var->data_type) * 0x1111,
|
|
.idx_count = 1,
|
|
};
|
|
|
|
if (var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS
|
|
|| var->data_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY)
|
|
{
|
|
instr.opcode |= var->data_type->sample_count << VKD3D_SM4_RESOURCE_SAMPLE_COUNT_SHIFT;
|
|
}
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_dcl_semantic(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_var *var)
|
|
{
|
|
const struct hlsl_profile_info *profile = ctx->profile;
|
|
const bool output = var->is_output_semantic;
|
|
D3D_NAME usage;
|
|
bool has_idx;
|
|
|
|
struct sm4_instruction instr =
|
|
{
|
|
.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_VEC4,
|
|
.dst_count = 1,
|
|
};
|
|
|
|
if (hlsl_sm4_register_from_semantic(ctx, &var->semantic, output, &instr.dsts[0].reg.type, NULL, &has_idx))
|
|
{
|
|
if (has_idx)
|
|
{
|
|
instr.dsts[0].reg.idx[0] = var->semantic.index;
|
|
instr.dsts[0].reg.idx_count = 1;
|
|
}
|
|
else
|
|
{
|
|
instr.dsts[0].reg.idx_count = 0;
|
|
}
|
|
instr.dsts[0].writemask = (1 << var->data_type->dimx) - 1;
|
|
}
|
|
else
|
|
{
|
|
instr.dsts[0].reg.type = output ? VKD3D_SM4_RT_OUTPUT : VKD3D_SM4_RT_INPUT;
|
|
instr.dsts[0].reg.idx[0] = var->regs[HLSL_REGSET_NUMERIC].id;
|
|
instr.dsts[0].reg.idx_count = 1;
|
|
instr.dsts[0].writemask = var->regs[HLSL_REGSET_NUMERIC].writemask;
|
|
}
|
|
|
|
if (instr.dsts[0].reg.type == VKD3D_SM4_RT_DEPTHOUT)
|
|
instr.dsts[0].reg.dim = VKD3D_SM4_DIMENSION_SCALAR;
|
|
|
|
hlsl_sm4_usage_from_semantic(ctx, &var->semantic, output, &usage);
|
|
if (usage == ~0u)
|
|
usage = D3D_NAME_UNDEFINED;
|
|
|
|
if (var->is_input_semantic)
|
|
{
|
|
switch (usage)
|
|
{
|
|
case D3D_NAME_UNDEFINED:
|
|
instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL)
|
|
? VKD3D_SM4_OP_DCL_INPUT_PS : VKD3D_SM4_OP_DCL_INPUT;
|
|
break;
|
|
|
|
case D3D_NAME_INSTANCE_ID:
|
|
case D3D_NAME_PRIMITIVE_ID:
|
|
case D3D_NAME_VERTEX_ID:
|
|
instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL)
|
|
? VKD3D_SM4_OP_DCL_INPUT_PS_SGV : VKD3D_SM4_OP_DCL_INPUT_SGV;
|
|
break;
|
|
|
|
default:
|
|
instr.opcode = (profile->type == VKD3D_SHADER_TYPE_PIXEL)
|
|
? VKD3D_SM4_OP_DCL_INPUT_PS_SIV : VKD3D_SM4_OP_DCL_INPUT_SIV;
|
|
break;
|
|
}
|
|
|
|
if (profile->type == VKD3D_SHADER_TYPE_PIXEL)
|
|
{
|
|
enum vkd3d_shader_interpolation_mode mode = VKD3DSIM_LINEAR;
|
|
|
|
if ((var->storage_modifiers & HLSL_STORAGE_NOINTERPOLATION) || type_is_integer(var->data_type))
|
|
mode = VKD3DSIM_CONSTANT;
|
|
|
|
instr.opcode |= mode << VKD3D_SM4_INTERPOLATION_MODE_SHIFT;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (usage == D3D_NAME_UNDEFINED || profile->type == VKD3D_SHADER_TYPE_PIXEL)
|
|
instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT;
|
|
else
|
|
instr.opcode = VKD3D_SM4_OP_DCL_OUTPUT_SIV;
|
|
}
|
|
|
|
switch (usage)
|
|
{
|
|
case D3D_NAME_COVERAGE:
|
|
case D3D_NAME_DEPTH:
|
|
case D3D_NAME_DEPTH_GREATER_EQUAL:
|
|
case D3D_NAME_DEPTH_LESS_EQUAL:
|
|
case D3D_NAME_TARGET:
|
|
case D3D_NAME_UNDEFINED:
|
|
break;
|
|
|
|
default:
|
|
instr.idx_count = 1;
|
|
instr.idx[0] = usage;
|
|
break;
|
|
}
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_dcl_temps(struct vkd3d_bytecode_buffer *buffer, uint32_t temp_count)
|
|
{
|
|
struct sm4_instruction instr =
|
|
{
|
|
.opcode = VKD3D_SM4_OP_DCL_TEMPS,
|
|
|
|
.idx = {temp_count},
|
|
.idx_count = 1,
|
|
};
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_dcl_thread_group(struct vkd3d_bytecode_buffer *buffer, const uint32_t thread_count[3])
|
|
{
|
|
struct sm4_instruction instr =
|
|
{
|
|
.opcode = VKD3D_SM5_OP_DCL_THREAD_GROUP,
|
|
|
|
.idx = {thread_count[0], thread_count[1], thread_count[2]},
|
|
.idx_count = 3,
|
|
};
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_ret(struct vkd3d_bytecode_buffer *buffer)
|
|
{
|
|
struct sm4_instruction instr =
|
|
{
|
|
.opcode = VKD3D_SM4_OP_RET,
|
|
};
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_unary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode,
|
|
const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src, unsigned int src_mod)
|
|
{
|
|
struct sm4_instruction instr;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = opcode;
|
|
|
|
sm4_dst_from_node(&instr.dsts[0], dst);
|
|
instr.dst_count = 1;
|
|
|
|
sm4_src_from_node(&instr.srcs[0], src, instr.dsts[0].writemask);
|
|
instr.srcs[0].reg.mod = src_mod;
|
|
instr.src_count = 1;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_unary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer,
|
|
enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx,
|
|
const struct hlsl_ir_node *src)
|
|
{
|
|
struct sm4_instruction instr;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = opcode;
|
|
|
|
assert(dst_idx < ARRAY_SIZE(instr.dsts));
|
|
sm4_dst_from_node(&instr.dsts[dst_idx], dst);
|
|
assert(1 - dst_idx >= 0);
|
|
instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL;
|
|
instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE;
|
|
instr.dsts[1 - dst_idx].reg.idx_count = 0;
|
|
instr.dst_count = 2;
|
|
|
|
sm4_src_from_node(&instr.srcs[0], src, instr.dsts[dst_idx].writemask);
|
|
instr.src_count = 1;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_binary_op(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode,
|
|
const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2)
|
|
{
|
|
struct sm4_instruction instr;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = opcode;
|
|
|
|
sm4_dst_from_node(&instr.dsts[0], dst);
|
|
instr.dst_count = 1;
|
|
|
|
sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[0].writemask);
|
|
sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[0].writemask);
|
|
instr.src_count = 2;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
/* dp# instructions don't map the swizzle. */
|
|
static void write_sm4_binary_op_dot(struct vkd3d_bytecode_buffer *buffer, enum vkd3d_sm4_opcode opcode,
|
|
const struct hlsl_ir_node *dst, const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2)
|
|
{
|
|
struct sm4_instruction instr;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = opcode;
|
|
|
|
sm4_dst_from_node(&instr.dsts[0], dst);
|
|
instr.dst_count = 1;
|
|
|
|
sm4_src_from_node(&instr.srcs[0], src1, VKD3DSP_WRITEMASK_ALL);
|
|
sm4_src_from_node(&instr.srcs[1], src2, VKD3DSP_WRITEMASK_ALL);
|
|
instr.src_count = 2;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_binary_op_with_two_destinations(struct vkd3d_bytecode_buffer *buffer,
|
|
enum vkd3d_sm4_opcode opcode, const struct hlsl_ir_node *dst, unsigned dst_idx,
|
|
const struct hlsl_ir_node *src1, const struct hlsl_ir_node *src2)
|
|
{
|
|
struct sm4_instruction instr;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = opcode;
|
|
|
|
assert(dst_idx < ARRAY_SIZE(instr.dsts));
|
|
sm4_dst_from_node(&instr.dsts[dst_idx], dst);
|
|
assert(1 - dst_idx >= 0);
|
|
instr.dsts[1 - dst_idx].reg.type = VKD3D_SM4_RT_NULL;
|
|
instr.dsts[1 - dst_idx].reg.dim = VKD3D_SM4_DIMENSION_NONE;
|
|
instr.dsts[1 - dst_idx].reg.idx_count = 0;
|
|
instr.dst_count = 2;
|
|
|
|
sm4_src_from_node(&instr.srcs[0], src1, instr.dsts[dst_idx].writemask);
|
|
sm4_src_from_node(&instr.srcs[1], src2, instr.dsts[dst_idx].writemask);
|
|
instr.src_count = 2;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_constant(struct hlsl_ctx *ctx,
|
|
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_constant *constant)
|
|
{
|
|
const unsigned int dimx = constant->node.data_type->dimx;
|
|
struct sm4_instruction instr;
|
|
struct sm4_register *reg = &instr.srcs[0].reg;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = VKD3D_SM4_OP_MOV;
|
|
|
|
sm4_dst_from_node(&instr.dsts[0], &constant->node);
|
|
instr.dst_count = 1;
|
|
|
|
instr.srcs[0].swizzle_type = VKD3D_SM4_SWIZZLE_NONE;
|
|
reg->type = VKD3D_SM4_RT_IMMCONST;
|
|
if (dimx == 1)
|
|
{
|
|
reg->dim = VKD3D_SM4_DIMENSION_SCALAR;
|
|
reg->immconst_uint[0] = constant->value[0].u;
|
|
}
|
|
else
|
|
{
|
|
unsigned int i, j = 0;
|
|
|
|
reg->dim = VKD3D_SM4_DIMENSION_VEC4;
|
|
for (i = 0; i < 4; ++i)
|
|
{
|
|
if (instr.dsts[0].writemask & (1u << i))
|
|
reg->immconst_uint[i] = constant->value[j++].u;
|
|
}
|
|
}
|
|
instr.src_count = 1,
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_ld(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
|
|
const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst,
|
|
const struct hlsl_deref *resource, const struct hlsl_ir_node *coords,
|
|
const struct hlsl_ir_node *texel_offset)
|
|
{
|
|
bool uav = (resource_type->base_type == HLSL_TYPE_UAV);
|
|
struct sm4_instruction instr;
|
|
unsigned int dim_count;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = uav ? VKD3D_SM5_OP_LD_UAV_TYPED : VKD3D_SM4_OP_LD;
|
|
|
|
if (texel_offset)
|
|
{
|
|
if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset))
|
|
{
|
|
hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
|
|
"Offset must resolve to integer literal in the range -8 to 7.");
|
|
return;
|
|
}
|
|
}
|
|
|
|
sm4_dst_from_node(&instr.dsts[0], dst);
|
|
instr.dst_count = 1;
|
|
|
|
sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL);
|
|
|
|
if (!uav)
|
|
{
|
|
/* Mipmap level is in the last component in the IR, but needs to be in the W
|
|
* component in the instruction. */
|
|
dim_count = hlsl_sampler_dim_count(resource_type->sampler_dim);
|
|
if (dim_count == 1)
|
|
instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, X, X, Y), 4);
|
|
if (dim_count == 2)
|
|
instr.srcs[0].swizzle = hlsl_combine_swizzles(instr.srcs[0].swizzle, HLSL_SWIZZLE(X, Y, X, Z), 4);
|
|
}
|
|
|
|
sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask);
|
|
|
|
instr.src_count = 2;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_sample(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
|
|
const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst,
|
|
const struct hlsl_deref *resource, const struct hlsl_deref *sampler,
|
|
const struct hlsl_ir_node *coords, const struct hlsl_ir_node *texel_offset)
|
|
{
|
|
struct sm4_instruction instr;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = VKD3D_SM4_OP_SAMPLE;
|
|
|
|
if (texel_offset)
|
|
{
|
|
if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset))
|
|
{
|
|
hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
|
|
"Offset must resolve to integer literal in the range -8 to 7.");
|
|
return;
|
|
}
|
|
}
|
|
|
|
sm4_dst_from_node(&instr.dsts[0], dst);
|
|
instr.dst_count = 1;
|
|
|
|
sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL);
|
|
sm4_src_from_deref(ctx, &instr.srcs[1], resource, resource_type, instr.dsts[0].writemask);
|
|
sm4_src_from_deref(ctx, &instr.srcs[2], sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL);
|
|
instr.src_count = 3;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static bool type_is_float(const struct hlsl_type *type)
|
|
{
|
|
return type->base_type == HLSL_TYPE_FLOAT || type->base_type == HLSL_TYPE_HALF;
|
|
}
|
|
|
|
static void write_sm4_cast_from_bool(struct hlsl_ctx *ctx,
|
|
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr,
|
|
const struct hlsl_ir_node *arg, uint32_t mask)
|
|
{
|
|
struct sm4_instruction instr;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = VKD3D_SM4_OP_AND;
|
|
|
|
sm4_dst_from_node(&instr.dsts[0], &expr->node);
|
|
instr.dst_count = 1;
|
|
|
|
sm4_src_from_node(&instr.srcs[0], arg, instr.dsts[0].writemask);
|
|
instr.srcs[1].swizzle_type = VKD3D_SM4_SWIZZLE_NONE;
|
|
instr.srcs[1].reg.type = VKD3D_SM4_RT_IMMCONST;
|
|
instr.srcs[1].reg.dim = VKD3D_SM4_DIMENSION_SCALAR;
|
|
instr.srcs[1].reg.immconst_uint[0] = mask;
|
|
instr.src_count = 2;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_cast(struct hlsl_ctx *ctx,
|
|
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr)
|
|
{
|
|
static const union
|
|
{
|
|
uint32_t u;
|
|
float f;
|
|
} one = { .f = 1.0 };
|
|
const struct hlsl_ir_node *arg1 = expr->operands[0].node;
|
|
const struct hlsl_type *dst_type = expr->node.data_type;
|
|
const struct hlsl_type *src_type = arg1->data_type;
|
|
|
|
/* Narrowing casts were already lowered. */
|
|
assert(src_type->dimx == dst_type->dimx);
|
|
|
|
switch (dst_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
switch (src_type->base_type)
|
|
{
|
|
case HLSL_TYPE_HALF:
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_TYPE_INT:
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_ITOF, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_UTOF, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_TYPE_BOOL:
|
|
write_sm4_cast_from_bool(ctx, buffer, expr, arg1, one.u);
|
|
break;
|
|
|
|
case HLSL_TYPE_DOUBLE:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float.");
|
|
break;
|
|
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
break;
|
|
|
|
case HLSL_TYPE_INT:
|
|
switch (src_type->base_type)
|
|
{
|
|
case HLSL_TYPE_HALF:
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOI, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_TYPE_INT:
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_TYPE_BOOL:
|
|
write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1);
|
|
break;
|
|
|
|
case HLSL_TYPE_DOUBLE:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int.");
|
|
break;
|
|
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
break;
|
|
|
|
case HLSL_TYPE_UINT:
|
|
switch (src_type->base_type)
|
|
{
|
|
case HLSL_TYPE_HALF:
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_FTOU, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_TYPE_INT:
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_TYPE_BOOL:
|
|
write_sm4_cast_from_bool(ctx, buffer, expr, arg1, 1);
|
|
break;
|
|
|
|
case HLSL_TYPE_DOUBLE:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint.");
|
|
break;
|
|
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
break;
|
|
|
|
case HLSL_TYPE_HALF:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to half.");
|
|
break;
|
|
|
|
case HLSL_TYPE_DOUBLE:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double.");
|
|
break;
|
|
|
|
case HLSL_TYPE_BOOL:
|
|
/* Casts to bool should have already been lowered. */
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
}
|
|
|
|
static void write_sm4_store_uav_typed(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
|
|
const struct hlsl_deref *dst, const struct hlsl_ir_node *coords, const struct hlsl_ir_node *value)
|
|
{
|
|
struct sm4_instruction instr;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = VKD3D_SM5_OP_STORE_UAV_TYPED;
|
|
|
|
sm4_register_from_deref(ctx, &instr.dsts[0].reg, &instr.dsts[0].writemask, NULL, dst, dst->var->data_type);
|
|
instr.dst_count = 1;
|
|
|
|
sm4_src_from_node(&instr.srcs[0], coords, VKD3DSP_WRITEMASK_ALL);
|
|
sm4_src_from_node(&instr.srcs[1], value, VKD3DSP_WRITEMASK_ALL);
|
|
instr.src_count = 2;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_expr(struct hlsl_ctx *ctx,
|
|
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_expr *expr)
|
|
{
|
|
const struct hlsl_ir_node *arg1 = expr->operands[0].node;
|
|
const struct hlsl_ir_node *arg2 = expr->operands[1].node;
|
|
const struct hlsl_type *dst_type = expr->node.data_type;
|
|
struct vkd3d_string_buffer *dst_type_string;
|
|
|
|
assert(expr->node.reg.allocated);
|
|
|
|
if (!(dst_type_string = hlsl_type_to_string(ctx, dst_type)))
|
|
return;
|
|
|
|
switch (expr->op)
|
|
{
|
|
case HLSL_OP1_ABS:
|
|
switch (dst_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_ABS);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s absolute value expression.", dst_type_string->buffer);
|
|
}
|
|
break;
|
|
|
|
case HLSL_OP1_BIT_NOT:
|
|
assert(type_is_integer(dst_type));
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_OP1_CAST:
|
|
write_sm4_cast(ctx, buffer, expr);
|
|
break;
|
|
|
|
case HLSL_OP1_COS:
|
|
assert(type_is_float(dst_type));
|
|
write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 1, arg1);
|
|
break;
|
|
|
|
case HLSL_OP1_EXP2:
|
|
assert(type_is_float(dst_type));
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_EXP, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_OP1_FLOOR:
|
|
assert(type_is_float(dst_type));
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NI, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_OP1_FRACT:
|
|
assert(type_is_float(dst_type));
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_FRC, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_OP1_LOG2:
|
|
assert(type_is_float(dst_type));
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_LOG, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_OP1_LOGIC_NOT:
|
|
assert(dst_type->base_type == HLSL_TYPE_BOOL);
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_NOT, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_OP1_NEG:
|
|
switch (dst_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, VKD3D_SM4_REGISTER_MODIFIER_NEGATE);
|
|
break;
|
|
|
|
case HLSL_TYPE_INT:
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_INEG, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_string->buffer);
|
|
}
|
|
break;
|
|
|
|
case HLSL_OP1_REINTERPRET:
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_OP1_ROUND:
|
|
assert(type_is_float(dst_type));
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_ROUND_NE, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_OP1_RSQ:
|
|
assert(type_is_float(dst_type));
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_RSQ, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_OP1_SAT:
|
|
assert(type_is_float(dst_type));
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_MOV
|
|
| (VKD3D_SM4_INSTRUCTION_FLAG_SATURATE << VKD3D_SM4_INSTRUCTION_FLAGS_SHIFT),
|
|
&expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_OP1_SIN:
|
|
assert(type_is_float(dst_type));
|
|
write_sm4_unary_op_with_two_destinations(buffer, VKD3D_SM4_OP_SINCOS, &expr->node, 0, arg1);
|
|
break;
|
|
|
|
case HLSL_OP1_SQRT:
|
|
assert(type_is_float(dst_type));
|
|
write_sm4_unary_op(buffer, VKD3D_SM4_OP_SQRT, &expr->node, arg1, 0);
|
|
break;
|
|
|
|
case HLSL_OP2_ADD:
|
|
switch (dst_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_ADD, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_INT:
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_IADD, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_string->buffer);
|
|
}
|
|
break;
|
|
|
|
case HLSL_OP2_BIT_AND:
|
|
assert(type_is_integer(dst_type));
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_OP2_BIT_OR:
|
|
assert(type_is_integer(dst_type));
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_OP2_BIT_XOR:
|
|
assert(type_is_integer(dst_type));
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_XOR, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_OP2_DIV:
|
|
switch (dst_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_DIV, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 0, arg1, arg2);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_string->buffer);
|
|
}
|
|
break;
|
|
|
|
case HLSL_OP2_DOT:
|
|
switch (dst_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
switch (arg1->data_type->dimx)
|
|
{
|
|
case 4:
|
|
write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP4, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case 3:
|
|
write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP3, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case 2:
|
|
write_sm4_binary_op_dot(buffer, VKD3D_SM4_OP_DP2, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case 1:
|
|
default:
|
|
vkd3d_unreachable();
|
|
}
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_string->buffer);
|
|
}
|
|
break;
|
|
|
|
case HLSL_OP2_EQUAL:
|
|
{
|
|
const struct hlsl_type *src_type = arg1->data_type;
|
|
|
|
assert(dst_type->base_type == HLSL_TYPE_BOOL);
|
|
|
|
switch (src_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_EQ, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_BOOL:
|
|
case HLSL_TYPE_INT:
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_IEQ, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.",
|
|
debug_hlsl_type(ctx, src_type));
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case HLSL_OP2_GEQUAL:
|
|
{
|
|
const struct hlsl_type *src_type = arg1->data_type;
|
|
|
|
assert(dst_type->base_type == HLSL_TYPE_BOOL);
|
|
|
|
switch (src_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_GE, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_INT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_IGE, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_BOOL:
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_UGE, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.",
|
|
debug_hlsl_type(ctx, src_type));
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case HLSL_OP2_LESS:
|
|
{
|
|
const struct hlsl_type *src_type = arg1->data_type;
|
|
|
|
assert(dst_type->base_type == HLSL_TYPE_BOOL);
|
|
|
|
switch (src_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_LT, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_INT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_ILT, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_BOOL:
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_ULT, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.",
|
|
debug_hlsl_type(ctx, src_type));
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case HLSL_OP2_LOGIC_AND:
|
|
assert(dst_type->base_type == HLSL_TYPE_BOOL);
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_AND, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_OP2_LOGIC_OR:
|
|
assert(dst_type->base_type == HLSL_TYPE_BOOL);
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_OR, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_OP2_LSHIFT:
|
|
assert(type_is_integer(dst_type));
|
|
assert(dst_type->base_type != HLSL_TYPE_BOOL);
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_ISHL, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_OP2_MAX:
|
|
switch (dst_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_MAX, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_INT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMAX, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMAX, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_string->buffer);
|
|
}
|
|
break;
|
|
|
|
case HLSL_OP2_MIN:
|
|
switch (dst_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_MIN, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_INT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_IMIN, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_UMIN, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_string->buffer);
|
|
}
|
|
break;
|
|
|
|
case HLSL_OP2_MOD:
|
|
switch (dst_type->base_type)
|
|
{
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_UDIV, &expr->node, 1, arg1, arg2);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_string->buffer);
|
|
}
|
|
break;
|
|
|
|
case HLSL_OP2_MUL:
|
|
switch (dst_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_MUL, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_INT:
|
|
case HLSL_TYPE_UINT:
|
|
/* Using IMUL instead of UMUL because we're taking the low
|
|
* bits, and the native compiler generates IMUL. */
|
|
write_sm4_binary_op_with_two_destinations(buffer, VKD3D_SM4_OP_IMUL, &expr->node, 1, arg1, arg2);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_string->buffer);
|
|
}
|
|
break;
|
|
|
|
case HLSL_OP2_NEQUAL:
|
|
{
|
|
const struct hlsl_type *src_type = arg1->data_type;
|
|
|
|
assert(dst_type->base_type == HLSL_TYPE_BOOL);
|
|
|
|
switch (src_type->base_type)
|
|
{
|
|
case HLSL_TYPE_FLOAT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_NE, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
case HLSL_TYPE_BOOL:
|
|
case HLSL_TYPE_INT:
|
|
case HLSL_TYPE_UINT:
|
|
write_sm4_binary_op(buffer, VKD3D_SM4_OP_INE, &expr->node, arg1, arg2);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.",
|
|
debug_hlsl_type(ctx, src_type));
|
|
break;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case HLSL_OP2_RSHIFT:
|
|
assert(type_is_integer(dst_type));
|
|
assert(dst_type->base_type != HLSL_TYPE_BOOL);
|
|
write_sm4_binary_op(buffer, dst_type->base_type == HLSL_TYPE_INT ? VKD3D_SM4_OP_ISHR : VKD3D_SM4_OP_USHR,
|
|
&expr->node, arg1, arg2);
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op));
|
|
}
|
|
|
|
hlsl_release_string_buffer(ctx, dst_type_string);
|
|
}
|
|
|
|
static void write_sm4_if(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_if *iff)
|
|
{
|
|
struct sm4_instruction instr =
|
|
{
|
|
.opcode = VKD3D_SM4_OP_IF | VKD3D_SM4_CONDITIONAL_NZ,
|
|
.src_count = 1,
|
|
};
|
|
|
|
assert(iff->condition.node->data_type->dimx == 1);
|
|
|
|
sm4_src_from_node(&instr.srcs[0], iff->condition.node, VKD3DSP_WRITEMASK_ALL);
|
|
write_sm4_instruction(buffer, &instr);
|
|
|
|
write_sm4_block(ctx, buffer, &iff->then_instrs);
|
|
|
|
if (!list_empty(&iff->else_instrs.instrs))
|
|
{
|
|
instr.opcode = VKD3D_SM4_OP_ELSE;
|
|
instr.src_count = 0;
|
|
write_sm4_instruction(buffer, &instr);
|
|
|
|
write_sm4_block(ctx, buffer, &iff->else_instrs);
|
|
}
|
|
|
|
instr.opcode = VKD3D_SM4_OP_ENDIF;
|
|
instr.src_count = 0;
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_jump(struct hlsl_ctx *ctx,
|
|
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_jump *jump)
|
|
{
|
|
struct sm4_instruction instr = {0};
|
|
|
|
switch (jump->type)
|
|
{
|
|
case HLSL_IR_JUMP_BREAK:
|
|
instr.opcode = VKD3D_SM4_OP_BREAK;
|
|
break;
|
|
|
|
case HLSL_IR_JUMP_RETURN:
|
|
vkd3d_unreachable();
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type));
|
|
return;
|
|
}
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_load(struct hlsl_ctx *ctx,
|
|
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_load *load)
|
|
{
|
|
struct sm4_instruction instr;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = VKD3D_SM4_OP_MOV;
|
|
|
|
sm4_dst_from_node(&instr.dsts[0], &load->node);
|
|
instr.dst_count = 1;
|
|
|
|
sm4_src_from_deref(ctx, &instr.srcs[0], &load->src, load->node.data_type, instr.dsts[0].writemask);
|
|
instr.src_count = 1;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_loop(struct hlsl_ctx *ctx,
|
|
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_loop *loop)
|
|
{
|
|
struct sm4_instruction instr =
|
|
{
|
|
.opcode = VKD3D_SM4_OP_LOOP,
|
|
};
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
|
|
write_sm4_block(ctx, buffer, &loop->body);
|
|
|
|
instr.opcode = VKD3D_SM4_OP_ENDLOOP;
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_gather(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
|
|
const struct hlsl_type *resource_type, const struct hlsl_ir_node *dst,
|
|
const struct hlsl_deref *resource, const struct hlsl_deref *sampler,
|
|
const struct hlsl_ir_node *coords, unsigned int swizzle, const struct hlsl_ir_node *texel_offset)
|
|
{
|
|
struct sm4_src_register *src;
|
|
struct sm4_instruction instr;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
|
|
instr.opcode = VKD3D_SM4_OP_GATHER4;
|
|
|
|
sm4_dst_from_node(&instr.dsts[0], dst);
|
|
instr.dst_count = 1;
|
|
|
|
sm4_src_from_node(&instr.srcs[instr.src_count++], coords, VKD3DSP_WRITEMASK_ALL);
|
|
|
|
if (texel_offset)
|
|
{
|
|
if (!encode_texel_offset_as_aoffimmi(&instr, texel_offset))
|
|
{
|
|
if (ctx->profile->major_version < 5)
|
|
{
|
|
hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
|
|
"Offset must resolve to integer literal in the range -8 to 7 for profiles < 5.");
|
|
return;
|
|
}
|
|
instr.opcode = VKD3D_SM5_OP_GATHER4_PO;
|
|
sm4_src_from_node(&instr.srcs[instr.src_count++], texel_offset, VKD3DSP_WRITEMASK_ALL);
|
|
}
|
|
}
|
|
|
|
sm4_src_from_deref(ctx, &instr.srcs[instr.src_count++], resource, resource_type, instr.dsts[0].writemask);
|
|
|
|
src = &instr.srcs[instr.src_count++];
|
|
sm4_src_from_deref(ctx, src, sampler, sampler->var->data_type, VKD3DSP_WRITEMASK_ALL);
|
|
src->reg.dim = VKD3D_SM4_DIMENSION_VEC4;
|
|
src->swizzle_type = VKD3D_SM4_SWIZZLE_SCALAR;
|
|
src->swizzle = swizzle;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_resource_load(struct hlsl_ctx *ctx,
|
|
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_load *load)
|
|
{
|
|
const struct hlsl_type *resource_type = load->resource.var->data_type;
|
|
const struct hlsl_ir_node *texel_offset = load->texel_offset.node;
|
|
const struct hlsl_ir_node *coords = load->coords.node;
|
|
|
|
if (resource_type->class != HLSL_CLASS_OBJECT)
|
|
{
|
|
assert(resource_type->class == HLSL_CLASS_ARRAY || resource_type->class == HLSL_CLASS_STRUCT);
|
|
hlsl_fixme(ctx, &load->node.loc, "Resource being a component of another variable.");
|
|
return;
|
|
}
|
|
|
|
if (load->sampler.var)
|
|
{
|
|
const struct hlsl_type *sampler_type = load->sampler.var->data_type;
|
|
|
|
if (sampler_type->class != HLSL_CLASS_OBJECT)
|
|
{
|
|
assert(sampler_type->class == HLSL_CLASS_ARRAY || sampler_type->class == HLSL_CLASS_STRUCT);
|
|
hlsl_fixme(ctx, &load->node.loc, "Sampler being a component of another variable.");
|
|
return;
|
|
}
|
|
assert(sampler_type->base_type == HLSL_TYPE_SAMPLER);
|
|
assert(sampler_type->sampler_dim == HLSL_SAMPLER_DIM_GENERIC);
|
|
|
|
if (!load->sampler.var->is_uniform)
|
|
{
|
|
hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable.");
|
|
return;
|
|
}
|
|
}
|
|
|
|
if (!load->resource.var->is_uniform)
|
|
{
|
|
hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable.");
|
|
return;
|
|
}
|
|
|
|
switch (load->load_type)
|
|
{
|
|
case HLSL_RESOURCE_LOAD:
|
|
write_sm4_ld(ctx, buffer, resource_type, &load->node, &load->resource,
|
|
coords, texel_offset);
|
|
break;
|
|
|
|
case HLSL_RESOURCE_SAMPLE:
|
|
if (!load->sampler.var)
|
|
{
|
|
hlsl_fixme(ctx, &load->node.loc, "SM4 combined sample expression.");
|
|
return;
|
|
}
|
|
write_sm4_sample(ctx, buffer, resource_type, &load->node,
|
|
&load->resource, &load->sampler, coords, texel_offset);
|
|
break;
|
|
|
|
case HLSL_RESOURCE_GATHER_RED:
|
|
write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource,
|
|
&load->sampler, coords, HLSL_SWIZZLE(X, X, X, X), texel_offset);
|
|
break;
|
|
|
|
case HLSL_RESOURCE_GATHER_GREEN:
|
|
write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource,
|
|
&load->sampler, coords, HLSL_SWIZZLE(Y, Y, Y, Y), texel_offset);
|
|
break;
|
|
|
|
case HLSL_RESOURCE_GATHER_BLUE:
|
|
write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource,
|
|
&load->sampler, coords, HLSL_SWIZZLE(Z, Z, Z, Z), texel_offset);
|
|
break;
|
|
|
|
case HLSL_RESOURCE_GATHER_ALPHA:
|
|
write_sm4_gather(ctx, buffer, resource_type, &load->node, &load->resource,
|
|
&load->sampler, coords, HLSL_SWIZZLE(W, W, W, W), texel_offset);
|
|
break;
|
|
|
|
case HLSL_RESOURCE_SAMPLE_LOD:
|
|
hlsl_fixme(ctx, &load->node.loc, "SM4 sample-LOD expression.");
|
|
break;
|
|
}
|
|
}
|
|
|
|
static void write_sm4_resource_store(struct hlsl_ctx *ctx,
|
|
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_resource_store *store)
|
|
{
|
|
const struct hlsl_type *resource_type = store->resource.var->data_type;
|
|
|
|
if (resource_type->class != HLSL_CLASS_OBJECT)
|
|
{
|
|
assert(resource_type->class == HLSL_CLASS_ARRAY || resource_type->class == HLSL_CLASS_STRUCT);
|
|
hlsl_fixme(ctx, &store->node.loc, "Resource being a component of another variable.");
|
|
return;
|
|
}
|
|
|
|
if (!store->resource.var->is_uniform)
|
|
{
|
|
hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable.");
|
|
return;
|
|
}
|
|
|
|
write_sm4_store_uav_typed(ctx, buffer, &store->resource, store->coords.node, store->value.node);
|
|
}
|
|
|
|
static void write_sm4_store(struct hlsl_ctx *ctx,
|
|
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_store *store)
|
|
{
|
|
const struct hlsl_ir_node *rhs = store->rhs.node;
|
|
struct sm4_instruction instr;
|
|
unsigned int writemask;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = VKD3D_SM4_OP_MOV;
|
|
|
|
sm4_register_from_deref(ctx, &instr.dsts[0].reg, &writemask, NULL, &store->lhs, rhs->data_type);
|
|
instr.dsts[0].writemask = hlsl_combine_writemasks(writemask, store->writemask);
|
|
instr.dst_count = 1;
|
|
|
|
sm4_src_from_node(&instr.srcs[0], rhs, instr.dsts[0].writemask);
|
|
instr.src_count = 1;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_swizzle(struct hlsl_ctx *ctx,
|
|
struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_swizzle *swizzle)
|
|
{
|
|
struct sm4_instruction instr;
|
|
unsigned int writemask;
|
|
|
|
memset(&instr, 0, sizeof(instr));
|
|
instr.opcode = VKD3D_SM4_OP_MOV;
|
|
|
|
sm4_dst_from_node(&instr.dsts[0], &swizzle->node);
|
|
instr.dst_count = 1;
|
|
|
|
sm4_register_from_node(&instr.srcs[0].reg, &writemask, &instr.srcs[0].swizzle_type, swizzle->val.node);
|
|
instr.srcs[0].swizzle = hlsl_map_swizzle(hlsl_combine_swizzles(hlsl_swizzle_from_writemask(writemask),
|
|
swizzle->swizzle, swizzle->node.data_type->dimx), instr.dsts[0].writemask);
|
|
instr.src_count = 1;
|
|
|
|
write_sm4_instruction(buffer, &instr);
|
|
}
|
|
|
|
static void write_sm4_block(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
|
|
const struct hlsl_block *block)
|
|
{
|
|
const struct hlsl_ir_node *instr;
|
|
|
|
LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
|
|
{
|
|
if (instr->data_type)
|
|
{
|
|
if (instr->data_type->class == HLSL_CLASS_MATRIX)
|
|
{
|
|
hlsl_fixme(ctx, &instr->loc, "Matrix operations need to be lowered.");
|
|
break;
|
|
}
|
|
else if (instr->data_type->class == HLSL_CLASS_OBJECT)
|
|
{
|
|
hlsl_fixme(ctx, &instr->loc, "Object copy.");
|
|
break;
|
|
}
|
|
|
|
assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR);
|
|
}
|
|
|
|
switch (instr->type)
|
|
{
|
|
case HLSL_IR_CALL:
|
|
vkd3d_unreachable();
|
|
|
|
case HLSL_IR_CONSTANT:
|
|
write_sm4_constant(ctx, buffer, hlsl_ir_constant(instr));
|
|
break;
|
|
|
|
case HLSL_IR_EXPR:
|
|
write_sm4_expr(ctx, buffer, hlsl_ir_expr(instr));
|
|
break;
|
|
|
|
case HLSL_IR_IF:
|
|
write_sm4_if(ctx, buffer, hlsl_ir_if(instr));
|
|
break;
|
|
|
|
case HLSL_IR_JUMP:
|
|
write_sm4_jump(ctx, buffer, hlsl_ir_jump(instr));
|
|
break;
|
|
|
|
case HLSL_IR_LOAD:
|
|
write_sm4_load(ctx, buffer, hlsl_ir_load(instr));
|
|
break;
|
|
|
|
case HLSL_IR_RESOURCE_LOAD:
|
|
write_sm4_resource_load(ctx, buffer, hlsl_ir_resource_load(instr));
|
|
break;
|
|
|
|
case HLSL_IR_RESOURCE_STORE:
|
|
write_sm4_resource_store(ctx, buffer, hlsl_ir_resource_store(instr));
|
|
break;
|
|
|
|
case HLSL_IR_LOOP:
|
|
write_sm4_loop(ctx, buffer, hlsl_ir_loop(instr));
|
|
break;
|
|
|
|
case HLSL_IR_STORE:
|
|
write_sm4_store(ctx, buffer, hlsl_ir_store(instr));
|
|
break;
|
|
|
|
case HLSL_IR_SWIZZLE:
|
|
write_sm4_swizzle(ctx, buffer, hlsl_ir_swizzle(instr));
|
|
break;
|
|
|
|
default:
|
|
hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type));
|
|
}
|
|
}
|
|
}
|
|
|
|
static void write_sm4_shdr(struct hlsl_ctx *ctx,
|
|
const struct hlsl_ir_function_decl *entry_func, struct dxbc_writer *dxbc)
|
|
{
|
|
const struct hlsl_profile_info *profile = ctx->profile;
|
|
const struct hlsl_ir_var **extern_resources;
|
|
struct vkd3d_bytecode_buffer buffer = {0};
|
|
unsigned int extern_resources_count, i;
|
|
const struct hlsl_buffer *cbuffer;
|
|
const struct hlsl_ir_var *var;
|
|
size_t token_count_position;
|
|
|
|
static const uint16_t shader_types[VKD3D_SHADER_TYPE_COUNT] =
|
|
{
|
|
VKD3D_SM4_PS,
|
|
VKD3D_SM4_VS,
|
|
VKD3D_SM4_GS,
|
|
VKD3D_SM5_HS,
|
|
VKD3D_SM5_DS,
|
|
VKD3D_SM5_CS,
|
|
0, /* EFFECT */
|
|
0, /* TEXTURE */
|
|
VKD3D_SM4_LIB,
|
|
};
|
|
|
|
extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
|
|
|
|
put_u32(&buffer, vkd3d_make_u32((profile->major_version << 4) | profile->minor_version, shader_types[profile->type]));
|
|
token_count_position = put_u32(&buffer, 0);
|
|
|
|
LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
|
|
{
|
|
if (cbuffer->reg.allocated)
|
|
write_sm4_dcl_constant_buffer(&buffer, cbuffer);
|
|
}
|
|
|
|
for (i = 0; i < extern_resources_count; ++i)
|
|
{
|
|
var = extern_resources[i];
|
|
|
|
if (var->data_type->base_type == HLSL_TYPE_SAMPLER)
|
|
write_sm4_dcl_sampler(&buffer, var);
|
|
else if (var->data_type->base_type == HLSL_TYPE_TEXTURE || var->data_type->base_type == HLSL_TYPE_UAV)
|
|
write_sm4_dcl_texture(&buffer, var);
|
|
}
|
|
|
|
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
|
|
{
|
|
if ((var->is_input_semantic && var->last_read) || (var->is_output_semantic && var->first_write))
|
|
write_sm4_dcl_semantic(ctx, &buffer, var);
|
|
}
|
|
|
|
if (profile->type == VKD3D_SHADER_TYPE_COMPUTE)
|
|
write_sm4_dcl_thread_group(&buffer, ctx->thread_count);
|
|
|
|
if (ctx->temp_count)
|
|
write_sm4_dcl_temps(&buffer, ctx->temp_count);
|
|
|
|
write_sm4_block(ctx, &buffer, &entry_func->body);
|
|
|
|
write_sm4_ret(&buffer);
|
|
|
|
set_u32(&buffer, token_count_position, bytecode_get_size(&buffer) / sizeof(uint32_t));
|
|
|
|
dxbc_writer_add_section(dxbc, TAG_SHDR, buffer.data, buffer.size);
|
|
|
|
vkd3d_free(extern_resources);
|
|
}
|
|
|
|
int hlsl_sm4_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out)
|
|
{
|
|
struct dxbc_writer dxbc;
|
|
size_t i;
|
|
int ret;
|
|
|
|
dxbc_writer_init(&dxbc);
|
|
|
|
write_sm4_signature(ctx, &dxbc, false);
|
|
write_sm4_signature(ctx, &dxbc, true);
|
|
write_sm4_rdef(ctx, &dxbc);
|
|
write_sm4_shdr(ctx, entry_func, &dxbc);
|
|
|
|
if (!(ret = ctx->result))
|
|
ret = dxbc_writer_write(&dxbc, out);
|
|
for (i = 0; i < dxbc.section_count; ++i)
|
|
vkd3d_shader_free_shader_code(&dxbc.sections[i].data);
|
|
return ret;
|
|
}
|