Francisco Casas 96f66aa4f8 vkd3d-shader/d3dbc: Use the bind count instead of the allocation size in d3dbc.c.
This should have no effect, since in SM1 the allocation size is the
same as the bind count because there are no texture registers.
It is just done for consistency.
2023-08-15 21:51:33 +02:00

2477 lines
89 KiB
C

/*
* d3dbc (Direct3D shader models 1-3 bytecode) support
*
* Copyright 2002-2003 Jason Edmeades
* Copyright 2002-2003 Raphael Junqueira
* Copyright 2004 Christian Costa
* Copyright 2005 Oliver Stieber
* Copyright 2006 Ivan Gyurdiev
* Copyright 2007-2008 Stefan Dösinger for CodeWeavers
* Copyright 2009, 2021 Henri Verbeet for CodeWeavers
* Copyright 2019-2020 Zebediah Figura for CodeWeavers
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
*/
#include "hlsl.h"
#define VKD3D_SM1_VS 0xfffeu
#define VKD3D_SM1_PS 0xffffu
#define VKD3D_SM1_DCL_USAGE_SHIFT 0u
#define VKD3D_SM1_DCL_USAGE_MASK (0xfu << VKD3D_SM1_DCL_USAGE_SHIFT)
#define VKD3D_SM1_DCL_USAGE_INDEX_SHIFT 16u
#define VKD3D_SM1_DCL_USAGE_INDEX_MASK (0xfu << VKD3D_SM1_DCL_USAGE_INDEX_SHIFT)
#define VKD3D_SM1_RESOURCE_TYPE_SHIFT 27u
#define VKD3D_SM1_RESOURCE_TYPE_MASK (0xfu << VKD3D_SM1_RESOURCE_TYPE_SHIFT)
#define VKD3D_SM1_OPCODE_MASK 0x0000ffffu
#define VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT 16u
#define VKD3D_SM1_INSTRUCTION_FLAGS_MASK (0xffu << VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT)
#define VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT 24u
#define VKD3D_SM1_INSTRUCTION_LENGTH_MASK (0xfu << VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT)
#define VKD3D_SM1_COISSUE (0x1u << 30u)
#define VKD3D_SM1_COMMENT_SIZE_SHIFT 16u
#define VKD3D_SM1_COMMENT_SIZE_MASK (0x7fffu << VKD3D_SM1_COMMENT_SIZE_SHIFT)
#define VKD3D_SM1_INSTRUCTION_PREDICATED (0x1u << 28u)
#define VKD3D_SM1_INSTRUCTION_PARAMETER (0x1u << 31u)
#define VKD3D_SM1_REGISTER_NUMBER_MASK 0x000007ffu
#define VKD3D_SM1_REGISTER_TYPE_SHIFT 28u
#define VKD3D_SM1_REGISTER_TYPE_MASK (0x7u << VKD3D_SM1_REGISTER_TYPE_SHIFT)
#define VKD3D_SM1_REGISTER_TYPE_SHIFT2 8u
#define VKD3D_SM1_REGISTER_TYPE_MASK2 (0x18u << VKD3D_SM1_REGISTER_TYPE_SHIFT2)
#define VKD3D_SM1_ADDRESS_MODE_SHIFT 13u
#define VKD3D_SM1_ADDRESS_MODE_MASK (0x1u << VKD3D_SM1_ADDRESS_MODE_SHIFT)
#define VKD3D_SM1_DST_MODIFIER_SHIFT 20u
#define VKD3D_SM1_DST_MODIFIER_MASK (0xfu << VKD3D_SM1_DST_MODIFIER_SHIFT)
#define VKD3D_SM1_DSTSHIFT_SHIFT 24u
#define VKD3D_SM1_DSTSHIFT_MASK (0xfu << VKD3D_SM1_DSTSHIFT_SHIFT)
#define VKD3D_SM1_WRITEMASK_SHIFT 16u
#define VKD3D_SM1_WRITEMASK_MASK (0xfu << VKD3D_SM1_WRITEMASK_SHIFT)
#define VKD3D_SM1_SWIZZLE_SHIFT 16u
#define VKD3D_SM1_SWIZZLE_MASK (0xffu << VKD3D_SM1_SWIZZLE_SHIFT)
#define VKD3D_SM1_SWIZZLE_DEFAULT (0u | (1u << 2) | (2u << 4) | (3u << 6))
#define VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(idx) (2u * (idx))
#define VKD3D_SM1_SWIZZLE_COMPONENT_MASK(idx) (0x3u << VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(idx))
#define VKD3D_SM1_SRC_MODIFIER_SHIFT 24u
#define VKD3D_SM1_SRC_MODIFIER_MASK (0xfu << VKD3D_SM1_SRC_MODIFIER_SHIFT)
#define VKD3D_SM1_END 0x0000ffffu
#define VKD3D_SM1_VERSION_MAJOR(version) (((version) >> 8u) & 0xffu)
#define VKD3D_SM1_VERSION_MINOR(version) (((version) >> 0u) & 0xffu)
enum vkd3d_sm1_address_mode_type
{
VKD3D_SM1_ADDRESS_MODE_ABSOLUTE = 0x0,
VKD3D_SM1_ADDRESS_MODE_RELATIVE = 0x1,
};
enum vkd3d_sm1_resource_type
{
VKD3D_SM1_RESOURCE_UNKNOWN = 0x0,
VKD3D_SM1_RESOURCE_TEXTURE_1D = 0x1,
VKD3D_SM1_RESOURCE_TEXTURE_2D = 0x2,
VKD3D_SM1_RESOURCE_TEXTURE_CUBE = 0x3,
VKD3D_SM1_RESOURCE_TEXTURE_3D = 0x4,
};
enum vkd3d_sm1_opcode
{
VKD3D_SM1_OP_NOP = 0x00,
VKD3D_SM1_OP_MOV = 0x01,
VKD3D_SM1_OP_ADD = 0x02,
VKD3D_SM1_OP_SUB = 0x03,
VKD3D_SM1_OP_MAD = 0x04,
VKD3D_SM1_OP_MUL = 0x05,
VKD3D_SM1_OP_RCP = 0x06,
VKD3D_SM1_OP_RSQ = 0x07,
VKD3D_SM1_OP_DP3 = 0x08,
VKD3D_SM1_OP_DP4 = 0x09,
VKD3D_SM1_OP_MIN = 0x0a,
VKD3D_SM1_OP_MAX = 0x0b,
VKD3D_SM1_OP_SLT = 0x0c,
VKD3D_SM1_OP_SGE = 0x0d,
VKD3D_SM1_OP_EXP = 0x0e,
VKD3D_SM1_OP_LOG = 0x0f,
VKD3D_SM1_OP_LIT = 0x10,
VKD3D_SM1_OP_DST = 0x11,
VKD3D_SM1_OP_LRP = 0x12,
VKD3D_SM1_OP_FRC = 0x13,
VKD3D_SM1_OP_M4x4 = 0x14,
VKD3D_SM1_OP_M4x3 = 0x15,
VKD3D_SM1_OP_M3x4 = 0x16,
VKD3D_SM1_OP_M3x3 = 0x17,
VKD3D_SM1_OP_M3x2 = 0x18,
VKD3D_SM1_OP_CALL = 0x19,
VKD3D_SM1_OP_CALLNZ = 0x1a,
VKD3D_SM1_OP_LOOP = 0x1b,
VKD3D_SM1_OP_RET = 0x1c,
VKD3D_SM1_OP_ENDLOOP = 0x1d,
VKD3D_SM1_OP_LABEL = 0x1e,
VKD3D_SM1_OP_DCL = 0x1f,
VKD3D_SM1_OP_POW = 0x20,
VKD3D_SM1_OP_CRS = 0x21,
VKD3D_SM1_OP_SGN = 0x22,
VKD3D_SM1_OP_ABS = 0x23,
VKD3D_SM1_OP_NRM = 0x24,
VKD3D_SM1_OP_SINCOS = 0x25,
VKD3D_SM1_OP_REP = 0x26,
VKD3D_SM1_OP_ENDREP = 0x27,
VKD3D_SM1_OP_IF = 0x28,
VKD3D_SM1_OP_IFC = 0x29,
VKD3D_SM1_OP_ELSE = 0x2a,
VKD3D_SM1_OP_ENDIF = 0x2b,
VKD3D_SM1_OP_BREAK = 0x2c,
VKD3D_SM1_OP_BREAKC = 0x2d,
VKD3D_SM1_OP_MOVA = 0x2e,
VKD3D_SM1_OP_DEFB = 0x2f,
VKD3D_SM1_OP_DEFI = 0x30,
VKD3D_SM1_OP_TEXCOORD = 0x40,
VKD3D_SM1_OP_TEXKILL = 0x41,
VKD3D_SM1_OP_TEX = 0x42,
VKD3D_SM1_OP_TEXBEM = 0x43,
VKD3D_SM1_OP_TEXBEML = 0x44,
VKD3D_SM1_OP_TEXREG2AR = 0x45,
VKD3D_SM1_OP_TEXREG2GB = 0x46,
VKD3D_SM1_OP_TEXM3x2PAD = 0x47,
VKD3D_SM1_OP_TEXM3x2TEX = 0x48,
VKD3D_SM1_OP_TEXM3x3PAD = 0x49,
VKD3D_SM1_OP_TEXM3x3TEX = 0x4a,
VKD3D_SM1_OP_TEXM3x3DIFF = 0x4b,
VKD3D_SM1_OP_TEXM3x3SPEC = 0x4c,
VKD3D_SM1_OP_TEXM3x3VSPEC = 0x4d,
VKD3D_SM1_OP_EXPP = 0x4e,
VKD3D_SM1_OP_LOGP = 0x4f,
VKD3D_SM1_OP_CND = 0x50,
VKD3D_SM1_OP_DEF = 0x51,
VKD3D_SM1_OP_TEXREG2RGB = 0x52,
VKD3D_SM1_OP_TEXDP3TEX = 0x53,
VKD3D_SM1_OP_TEXM3x2DEPTH = 0x54,
VKD3D_SM1_OP_TEXDP3 = 0x55,
VKD3D_SM1_OP_TEXM3x3 = 0x56,
VKD3D_SM1_OP_TEXDEPTH = 0x57,
VKD3D_SM1_OP_CMP = 0x58,
VKD3D_SM1_OP_BEM = 0x59,
VKD3D_SM1_OP_DP2ADD = 0x5a,
VKD3D_SM1_OP_DSX = 0x5b,
VKD3D_SM1_OP_DSY = 0x5c,
VKD3D_SM1_OP_TEXLDD = 0x5d,
VKD3D_SM1_OP_SETP = 0x5e,
VKD3D_SM1_OP_TEXLDL = 0x5f,
VKD3D_SM1_OP_BREAKP = 0x60,
VKD3D_SM1_OP_PHASE = 0xfffd,
VKD3D_SM1_OP_COMMENT = 0xfffe,
VKD3D_SM1_OP_END = 0Xffff,
};
struct vkd3d_sm1_opcode_info
{
enum vkd3d_sm1_opcode sm1_opcode;
unsigned int dst_count;
unsigned int src_count;
enum vkd3d_shader_opcode vkd3d_opcode;
struct
{
unsigned int major, minor;
} min_version, max_version;
};
struct vkd3d_shader_sm1_parser
{
const struct vkd3d_sm1_opcode_info *opcode_table;
const uint32_t *start, *end, *ptr;
bool abort;
struct vkd3d_shader_parser p;
#define MAX_CONSTANT_COUNT 8192
uint32_t constant_def_mask[3][MAX_CONSTANT_COUNT / 32];
};
/* This table is not order or position dependent. */
static const struct vkd3d_sm1_opcode_info vs_opcode_table[] =
{
/* Arithmetic */
{VKD3D_SM1_OP_NOP, 0, 0, VKD3DSIH_NOP},
{VKD3D_SM1_OP_MOV, 1, 1, VKD3DSIH_MOV},
{VKD3D_SM1_OP_MOVA, 1, 1, VKD3DSIH_MOVA, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_ADD, 1, 2, VKD3DSIH_ADD},
{VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB},
{VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD},
{VKD3D_SM1_OP_MUL, 1, 2, VKD3DSIH_MUL},
{VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP},
{VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ},
{VKD3D_SM1_OP_DP3, 1, 2, VKD3DSIH_DP3},
{VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4},
{VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN},
{VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX},
{VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT},
{VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE},
{VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS},
{VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP},
{VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG},
{VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP},
{VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP},
{VKD3D_SM1_OP_LIT, 1, 1, VKD3DSIH_LIT},
{VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST},
{VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP},
{VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC},
{VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW},
{VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS},
{VKD3D_SM1_OP_SGN, 1, 3, VKD3DSIH_SGN, {2, 0}, { 2, 1}},
{VKD3D_SM1_OP_SGN, 1, 1, VKD3DSIH_SGN, {3, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM,},
{VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}},
{VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}},
/* Matrix */
{VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4},
{VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3},
{VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4},
{VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3},
{VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2},
/* Declarations */
{VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL},
/* Constant definitions */
{VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF},
{VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB},
{VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI},
/* Control flow */
{VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP},
{VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP},
{VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}},
{0, 0, 0, VKD3DSIH_INVALID},
};
static const struct vkd3d_sm1_opcode_info ps_opcode_table[] =
{
/* Arithmetic */
{VKD3D_SM1_OP_NOP, 0, 0, VKD3DSIH_NOP},
{VKD3D_SM1_OP_MOV, 1, 1, VKD3DSIH_MOV},
{VKD3D_SM1_OP_ADD, 1, 2, VKD3DSIH_ADD},
{VKD3D_SM1_OP_SUB, 1, 2, VKD3DSIH_SUB},
{VKD3D_SM1_OP_MAD, 1, 3, VKD3DSIH_MAD},
{VKD3D_SM1_OP_MUL, 1, 2, VKD3DSIH_MUL},
{VKD3D_SM1_OP_RCP, 1, 1, VKD3DSIH_RCP},
{VKD3D_SM1_OP_RSQ, 1, 1, VKD3DSIH_RSQ},
{VKD3D_SM1_OP_DP3, 1, 2, VKD3DSIH_DP3},
{VKD3D_SM1_OP_DP4, 1, 2, VKD3DSIH_DP4},
{VKD3D_SM1_OP_MIN, 1, 2, VKD3DSIH_MIN},
{VKD3D_SM1_OP_MAX, 1, 2, VKD3DSIH_MAX},
{VKD3D_SM1_OP_SLT, 1, 2, VKD3DSIH_SLT},
{VKD3D_SM1_OP_SGE, 1, 2, VKD3DSIH_SGE},
{VKD3D_SM1_OP_ABS, 1, 1, VKD3DSIH_ABS},
{VKD3D_SM1_OP_EXP, 1, 1, VKD3DSIH_EXP},
{VKD3D_SM1_OP_LOG, 1, 1, VKD3DSIH_LOG},
{VKD3D_SM1_OP_EXPP, 1, 1, VKD3DSIH_EXPP},
{VKD3D_SM1_OP_LOGP, 1, 1, VKD3DSIH_LOGP},
{VKD3D_SM1_OP_DST, 1, 2, VKD3DSIH_DST},
{VKD3D_SM1_OP_LRP, 1, 3, VKD3DSIH_LRP},
{VKD3D_SM1_OP_FRC, 1, 1, VKD3DSIH_FRC},
{VKD3D_SM1_OP_CND, 1, 3, VKD3DSIH_CND, {1, 0}, { 1, 4}},
{VKD3D_SM1_OP_CMP, 1, 3, VKD3DSIH_CMP, {1, 2}, { 3, 0}},
{VKD3D_SM1_OP_POW, 1, 2, VKD3DSIH_POW},
{VKD3D_SM1_OP_CRS, 1, 2, VKD3DSIH_CRS},
{VKD3D_SM1_OP_NRM, 1, 1, VKD3DSIH_NRM},
{VKD3D_SM1_OP_SINCOS, 1, 3, VKD3DSIH_SINCOS, {2, 0}, { 2, 1}},
{VKD3D_SM1_OP_SINCOS, 1, 1, VKD3DSIH_SINCOS, {3, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_DP2ADD, 1, 3, VKD3DSIH_DP2ADD, {2, 0}, {~0u, ~0u}},
/* Matrix */
{VKD3D_SM1_OP_M4x4, 1, 2, VKD3DSIH_M4x4},
{VKD3D_SM1_OP_M4x3, 1, 2, VKD3DSIH_M4x3},
{VKD3D_SM1_OP_M3x4, 1, 2, VKD3DSIH_M3x4},
{VKD3D_SM1_OP_M3x3, 1, 2, VKD3DSIH_M3x3},
{VKD3D_SM1_OP_M3x2, 1, 2, VKD3DSIH_M3x2},
/* Declarations */
{VKD3D_SM1_OP_DCL, 0, 2, VKD3DSIH_DCL},
/* Constant definitions */
{VKD3D_SM1_OP_DEF, 1, 1, VKD3DSIH_DEF},
{VKD3D_SM1_OP_DEFB, 1, 1, VKD3DSIH_DEFB},
{VKD3D_SM1_OP_DEFI, 1, 1, VKD3DSIH_DEFI},
/* Control flow */
{VKD3D_SM1_OP_REP, 0, 1, VKD3DSIH_REP, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_ENDREP, 0, 0, VKD3DSIH_ENDREP, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_IF, 0, 1, VKD3DSIH_IF, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_IFC, 0, 2, VKD3DSIH_IFC, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_ELSE, 0, 0, VKD3DSIH_ELSE, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_ENDIF, 0, 0, VKD3DSIH_ENDIF, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_BREAK, 0, 0, VKD3DSIH_BREAK, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_BREAKC, 0, 2, VKD3DSIH_BREAKC, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_BREAKP, 0, 1, VKD3DSIH_BREAKP},
{VKD3D_SM1_OP_CALL, 0, 1, VKD3DSIH_CALL, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_CALLNZ, 0, 2, VKD3DSIH_CALLNZ, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_LOOP, 0, 2, VKD3DSIH_LOOP, {3, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_RET, 0, 0, VKD3DSIH_RET, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_ENDLOOP, 0, 0, VKD3DSIH_ENDLOOP, {3, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_LABEL, 0, 1, VKD3DSIH_LABEL, {2, 1}, {~0u, ~0u}},
/* Texture */
{VKD3D_SM1_OP_TEXCOORD, 1, 0, VKD3DSIH_TEXCOORD, {0, 0}, { 1, 3}},
{VKD3D_SM1_OP_TEXCOORD, 1, 1, VKD3DSIH_TEXCOORD, {1 ,4}, { 1, 4}},
{VKD3D_SM1_OP_TEXKILL, 1, 0, VKD3DSIH_TEXKILL, {1 ,0}, { 3, 0}},
{VKD3D_SM1_OP_TEX, 1, 0, VKD3DSIH_TEX, {0, 0}, { 1, 3}},
{VKD3D_SM1_OP_TEX, 1, 1, VKD3DSIH_TEX, {1, 4}, { 1, 4}},
{VKD3D_SM1_OP_TEX, 1, 2, VKD3DSIH_TEX, {2, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_TEXBEM, 1, 1, VKD3DSIH_TEXBEM, {0, 0}, { 1, 3}},
{VKD3D_SM1_OP_TEXBEML, 1, 1, VKD3DSIH_TEXBEML, {1, 0}, { 1, 3}},
{VKD3D_SM1_OP_TEXREG2AR, 1, 1, VKD3DSIH_TEXREG2AR, {1, 0}, { 1, 3}},
{VKD3D_SM1_OP_TEXREG2GB, 1, 1, VKD3DSIH_TEXREG2GB, {1, 0}, { 1, 3}},
{VKD3D_SM1_OP_TEXREG2RGB, 1, 1, VKD3DSIH_TEXREG2RGB, {1, 2}, { 1, 3}},
{VKD3D_SM1_OP_TEXM3x2PAD, 1, 1, VKD3DSIH_TEXM3x2PAD, {1, 0}, { 1, 3}},
{VKD3D_SM1_OP_TEXM3x2TEX, 1, 1, VKD3DSIH_TEXM3x2TEX, {1, 0}, { 1, 3}},
{VKD3D_SM1_OP_TEXM3x3PAD, 1, 1, VKD3DSIH_TEXM3x3PAD, {1, 0}, { 1, 3}},
{VKD3D_SM1_OP_TEXM3x3DIFF, 1, 1, VKD3DSIH_TEXM3x3DIFF, {0, 0}, { 0, 0}},
{VKD3D_SM1_OP_TEXM3x3SPEC, 1, 2, VKD3DSIH_TEXM3x3SPEC, {1, 0}, { 1, 3}},
{VKD3D_SM1_OP_TEXM3x3VSPEC, 1, 1, VKD3DSIH_TEXM3x3VSPEC, {1, 0}, { 1, 3}},
{VKD3D_SM1_OP_TEXM3x3TEX, 1, 1, VKD3DSIH_TEXM3x3TEX, {1, 0}, { 1, 3}},
{VKD3D_SM1_OP_TEXDP3TEX, 1, 1, VKD3DSIH_TEXDP3TEX, {1, 2}, { 1, 3}},
{VKD3D_SM1_OP_TEXM3x2DEPTH, 1, 1, VKD3DSIH_TEXM3x2DEPTH, {1, 3}, { 1, 3}},
{VKD3D_SM1_OP_TEXDP3, 1, 1, VKD3DSIH_TEXDP3, {1, 2}, { 1, 3}},
{VKD3D_SM1_OP_TEXM3x3, 1, 1, VKD3DSIH_TEXM3x3, {1, 2}, { 1, 3}},
{VKD3D_SM1_OP_TEXDEPTH, 1, 0, VKD3DSIH_TEXDEPTH, {1, 4}, { 1, 4}},
{VKD3D_SM1_OP_BEM, 1, 2, VKD3DSIH_BEM, {1, 4}, { 1, 4}},
{VKD3D_SM1_OP_DSX, 1, 1, VKD3DSIH_DSX, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_DSY, 1, 1, VKD3DSIH_DSY, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_TEXLDD, 1, 4, VKD3DSIH_TEXLDD, {2, 1}, {~0u, ~0u}},
{VKD3D_SM1_OP_SETP, 1, 2, VKD3DSIH_SETP},
{VKD3D_SM1_OP_TEXLDL, 1, 2, VKD3DSIH_TEXLDL, {3, 0}, {~0u, ~0u}},
{VKD3D_SM1_OP_PHASE, 0, 0, VKD3DSIH_PHASE},
{0, 0, 0, VKD3DSIH_INVALID},
};
static const enum vkd3d_shader_resource_type resource_type_table[] =
{
/* VKD3D_SM1_RESOURCE_UNKNOWN */ VKD3D_SHADER_RESOURCE_NONE,
/* VKD3D_SM1_RESOURCE_TEXTURE_1D */ VKD3D_SHADER_RESOURCE_TEXTURE_1D,
/* VKD3D_SM1_RESOURCE_TEXTURE_2D */ VKD3D_SHADER_RESOURCE_TEXTURE_2D,
/* VKD3D_SM1_RESOURCE_TEXTURE_CUBE */ VKD3D_SHADER_RESOURCE_TEXTURE_CUBE,
/* VKD3D_SM1_RESOURCE_TEXTURE_3D */ VKD3D_SHADER_RESOURCE_TEXTURE_3D,
};
static struct vkd3d_shader_sm1_parser *vkd3d_shader_sm1_parser(struct vkd3d_shader_parser *parser)
{
return CONTAINING_RECORD(parser, struct vkd3d_shader_sm1_parser, p);
}
static uint32_t read_u32(const uint32_t **ptr)
{
return *(*ptr)++;
}
static bool shader_ver_ge(const struct vkd3d_shader_version *v, unsigned int major, unsigned int minor)
{
return v->major > major || (v->major == major && v->minor >= minor);
}
static bool shader_ver_le(const struct vkd3d_shader_version *v, unsigned int major, unsigned int minor)
{
return v->major < major || (v->major == major && v->minor <= minor);
}
static bool has_relative_address(uint32_t param)
{
enum vkd3d_sm1_address_mode_type address_mode;
address_mode = (param & VKD3D_SM1_ADDRESS_MODE_MASK) >> VKD3D_SM1_ADDRESS_MODE_SHIFT;
return address_mode == VKD3D_SM1_ADDRESS_MODE_RELATIVE;
}
static const struct vkd3d_sm1_opcode_info *shader_sm1_get_opcode_info(
const struct vkd3d_shader_sm1_parser *sm1, enum vkd3d_sm1_opcode opcode)
{
const struct vkd3d_sm1_opcode_info *info;
unsigned int i = 0;
for (;;)
{
info = &sm1->opcode_table[i++];
if (info->vkd3d_opcode == VKD3DSIH_INVALID)
return NULL;
if (opcode == info->sm1_opcode
&& shader_ver_ge(&sm1->p.shader_version, info->min_version.major, info->min_version.minor)
&& (shader_ver_le(&sm1->p.shader_version, info->max_version.major, info->max_version.minor)
|| !info->max_version.major))
return info;
}
}
static unsigned int shader_sm1_get_swizzle_component(uint32_t swizzle, unsigned int idx)
{
return (swizzle & VKD3D_SM1_SWIZZLE_COMPONENT_MASK(idx)) >> VKD3D_SM1_SWIZZLE_COMPONENT_SHIFT(idx);
}
static uint32_t swizzle_from_sm1(uint32_t swizzle)
{
return vkd3d_shader_create_swizzle(shader_sm1_get_swizzle_component(swizzle, 0),
shader_sm1_get_swizzle_component(swizzle, 1),
shader_sm1_get_swizzle_component(swizzle, 2),
shader_sm1_get_swizzle_component(swizzle, 3));
}
static void shader_sm1_parse_src_param(uint32_t param, const struct vkd3d_shader_src_param *rel_addr,
struct vkd3d_shader_src_param *src)
{
src->reg.type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT)
| ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2);
src->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT;
src->reg.non_uniform = false;
src->reg.data_type = VKD3D_DATA_FLOAT;
src->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK;
src->reg.idx[0].rel_addr = rel_addr;
src->reg.idx[1].offset = ~0u;
src->reg.idx[1].rel_addr = NULL;
src->reg.idx[2].offset = ~0u;
src->reg.idx[2].rel_addr = NULL;
src->reg.idx_count = 1;
src->swizzle = swizzle_from_sm1((param & VKD3D_SM1_SWIZZLE_MASK) >> VKD3D_SM1_SWIZZLE_SHIFT);
src->modifiers = (param & VKD3D_SM1_SRC_MODIFIER_MASK) >> VKD3D_SM1_SRC_MODIFIER_SHIFT;
}
static void shader_sm1_parse_dst_param(uint32_t param, const struct vkd3d_shader_src_param *rel_addr,
struct vkd3d_shader_dst_param *dst)
{
dst->reg.type = ((param & VKD3D_SM1_REGISTER_TYPE_MASK) >> VKD3D_SM1_REGISTER_TYPE_SHIFT)
| ((param & VKD3D_SM1_REGISTER_TYPE_MASK2) >> VKD3D_SM1_REGISTER_TYPE_SHIFT2);
dst->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT;
dst->reg.non_uniform = false;
dst->reg.data_type = VKD3D_DATA_FLOAT;
dst->reg.idx[0].offset = param & VKD3D_SM1_REGISTER_NUMBER_MASK;
dst->reg.idx[0].rel_addr = rel_addr;
dst->reg.idx[1].offset = ~0u;
dst->reg.idx[1].rel_addr = NULL;
dst->reg.idx[2].offset = ~0u;
dst->reg.idx[2].rel_addr = NULL;
dst->reg.idx_count = 1;
dst->write_mask = (param & VKD3D_SM1_WRITEMASK_MASK) >> VKD3D_SM1_WRITEMASK_SHIFT;
dst->modifiers = (param & VKD3D_SM1_DST_MODIFIER_MASK) >> VKD3D_SM1_DST_MODIFIER_SHIFT;
dst->shift = (param & VKD3D_SM1_DSTSHIFT_MASK) >> VKD3D_SM1_DSTSHIFT_SHIFT;
}
static struct signature_element *find_signature_element(const struct shader_signature *signature,
const char *semantic_name, unsigned int semantic_index)
{
struct signature_element *e = signature->elements;
unsigned int i;
for (i = 0; i < signature->element_count; ++i)
{
if (!ascii_strcasecmp(e[i].semantic_name, semantic_name)
&& e[i].semantic_index == semantic_index)
return &e[i];
}
return NULL;
}
static struct signature_element *find_signature_element_by_register_index(
const struct shader_signature *signature, unsigned int register_index)
{
struct signature_element *e = signature->elements;
unsigned int i;
for (i = 0; i < signature->element_count; ++i)
{
if (e[i].register_index == register_index)
return &e[i];
}
return NULL;
}
#define SM1_COLOR_REGISTER_OFFSET 8
static bool add_signature_element(struct vkd3d_shader_sm1_parser *sm1, bool output,
const char *name, unsigned int index, enum vkd3d_shader_sysval_semantic sysval,
unsigned int register_index, bool is_dcl, unsigned int mask)
{
struct shader_signature *signature;
struct signature_element *element;
if (output)
signature = &sm1->p.shader_desc.output_signature;
else
signature = &sm1->p.shader_desc.input_signature;
if ((element = find_signature_element(signature, name, index)))
{
element->mask |= mask;
if (!is_dcl)
element->used_mask |= mask;
return true;
}
if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity,
signature->element_count + 1, sizeof(*signature->elements)))
return false;
element = &signature->elements[signature->element_count++];
element->semantic_name = name;
element->semantic_index = index;
element->stream_index = 0;
element->sysval_semantic = sysval;
element->component_type = VKD3D_SHADER_COMPONENT_FLOAT;
element->register_index = register_index;
element->target_location = register_index;
element->register_count = 1;
element->mask = mask;
element->used_mask = is_dcl ? 0 : mask;
element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE;
return true;
}
static void add_signature_mask(struct vkd3d_shader_sm1_parser *sm1, bool output,
unsigned int register_index, unsigned int mask)
{
struct shader_signature *signature;
struct signature_element *element;
if (output)
signature = &sm1->p.shader_desc.output_signature;
else
signature = &sm1->p.shader_desc.input_signature;
if (!(element = find_signature_element_by_register_index(signature, register_index)))
{
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNDECLARED_SEMANTIC,
"%s register %u was used without being declared.", output ? "Output" : "Input", register_index);
return;
}
element->used_mask |= mask;
}
static bool add_signature_element_from_register(struct vkd3d_shader_sm1_parser *sm1,
const struct vkd3d_shader_register *reg, bool is_dcl, unsigned int mask)
{
unsigned int register_index = reg->idx[0].offset;
switch (reg->type)
{
case VKD3DSPR_TEMP:
if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL
&& sm1->p.shader_version.major == 1 && !register_index)
return add_signature_element(sm1, true, "COLOR", 0, VKD3D_SHADER_SV_NONE, 0, is_dcl, mask);
return true;
case VKD3DSPR_INPUT:
/* For vertex shaders or sm3 pixel shaders, we should have already
* had a DCL instruction. Otherwise, this is a colour input. */
if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX || sm1->p.shader_version.major == 3)
{
add_signature_mask(sm1, false, register_index, mask);
return true;
}
return add_signature_element(sm1, false, "COLOR", register_index,
VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask);
case VKD3DSPR_TEXTURE:
/* For vertex shaders, this is ADDR. */
if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX)
return true;
return add_signature_element(sm1, false, "TEXCOORD", register_index,
VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask);
case VKD3DSPR_OUTPUT:
if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX)
{
/* For sm < 2 vertex shaders, this is TEXCRDOUT.
*
* For sm3 vertex shaders, this is OUTPUT, but we already
* should have had a DCL instruction. */
if (sm1->p.shader_version.major == 3)
{
add_signature_mask(sm1, true, register_index, mask);
return true;
}
return add_signature_element(sm1, true, "TEXCOORD", register_index,
VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask);
}
/* fall through */
case VKD3DSPR_ATTROUT:
return add_signature_element(sm1, true, "COLOR", register_index,
VKD3D_SHADER_SV_NONE, SM1_COLOR_REGISTER_OFFSET + register_index, is_dcl, mask);
case VKD3DSPR_COLOROUT:
return add_signature_element(sm1, true, "COLOR", register_index,
VKD3D_SHADER_SV_NONE, register_index, is_dcl, mask);
case VKD3DSPR_DEPTHOUT:
return add_signature_element(sm1, true, "DEPTH", 0,
VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1);
case VKD3DSPR_RASTOUT:
switch (register_index)
{
case 0:
return add_signature_element(sm1, true, "POSITION", 0,
VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask);
case 1:
return add_signature_element(sm1, true, "FOG", 0,
VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1);
case 2:
return add_signature_element(sm1, true, "PSIZE", 0,
VKD3D_SHADER_SV_NONE, register_index, is_dcl, 0x1);
default:
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX,
"Invalid rasterizer output index %u.", register_index);
return true;
}
case VKD3DSPR_MISCTYPE:
switch (register_index)
{
case 0:
return add_signature_element(sm1, false, "VPOS", 0,
VKD3D_SHADER_SV_POSITION, register_index, is_dcl, mask);
case 1:
return add_signature_element(sm1, false, "VFACE", 0,
VKD3D_SHADER_SV_IS_FRONT_FACE, register_index, is_dcl, 0x1);
default:
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_REGISTER_INDEX,
"Invalid miscellaneous fragment input index %u.", register_index);
return true;
}
default:
return true;
}
}
static bool add_signature_element_from_semantic(struct vkd3d_shader_sm1_parser *sm1,
const struct vkd3d_shader_semantic *semantic)
{
const struct vkd3d_shader_register *reg = &semantic->resource.reg.reg;
enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE;
unsigned int mask = semantic->resource.reg.write_mask;
bool output;
static const char sm1_semantic_names[][13] =
{
[VKD3D_DECL_USAGE_POSITION ] = "POSITION",
[VKD3D_DECL_USAGE_BLEND_WEIGHT ] = "BLENDWEIGHT",
[VKD3D_DECL_USAGE_BLEND_INDICES] = "BLENDINDICES",
[VKD3D_DECL_USAGE_NORMAL ] = "NORMAL",
[VKD3D_DECL_USAGE_PSIZE ] = "PSIZE",
[VKD3D_DECL_USAGE_TEXCOORD ] = "TEXCOORD",
[VKD3D_DECL_USAGE_TANGENT ] = "TANGENT",
[VKD3D_DECL_USAGE_BINORMAL ] = "BINORMAL",
[VKD3D_DECL_USAGE_TESS_FACTOR ] = "TESSFACTOR",
[VKD3D_DECL_USAGE_POSITIONT ] = "POSITIONT",
[VKD3D_DECL_USAGE_COLOR ] = "COLOR",
[VKD3D_DECL_USAGE_FOG ] = "FOG",
[VKD3D_DECL_USAGE_DEPTH ] = "DEPTH",
[VKD3D_DECL_USAGE_SAMPLE ] = "SAMPLE",
};
if (reg->type == VKD3DSPR_OUTPUT)
output = true;
else if (reg->type == VKD3DSPR_INPUT || reg->type == VKD3DSPR_TEXTURE)
output = false;
else /* vpos and vface don't have a semantic. */
return add_signature_element_from_register(sm1, reg, true, mask);
/* sm2 pixel shaders use DCL but don't provide a semantic. */
if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_PIXEL && sm1->p.shader_version.major == 2)
return add_signature_element_from_register(sm1, reg, true, mask);
/* With the exception of vertex POSITION output, none of these are system
* values. Pixel POSITION input is not equivalent to SV_Position; the closer
* equivalent is VPOS, which is not declared as a semantic. */
if (sm1->p.shader_version.type == VKD3D_SHADER_TYPE_VERTEX
&& output && semantic->usage == VKD3D_DECL_USAGE_POSITION)
sysval = VKD3D_SHADER_SV_POSITION;
return add_signature_element(sm1, output, sm1_semantic_names[semantic->usage],
semantic->usage_idx, sysval, reg->idx[0].offset, true, mask);
}
static void record_constant_register(struct vkd3d_shader_sm1_parser *sm1,
enum vkd3d_shader_d3dbc_constant_register set, uint32_t index, bool from_def)
{
struct vkd3d_shader_desc *desc = &sm1->p.shader_desc;
desc->flat_constant_count[set].used = max(desc->flat_constant_count[set].used, index + 1);
if (from_def)
{
/* d3d shaders have a maximum of 8192 constants; we should not overrun
* this array. */
assert((index / 32) <= ARRAY_SIZE(sm1->constant_def_mask[set]));
bitmap_set(sm1->constant_def_mask[set], index);
}
}
static void shader_sm1_scan_register(struct vkd3d_shader_sm1_parser *sm1,
const struct vkd3d_shader_register *reg, unsigned int mask, bool from_def)
{
struct vkd3d_shader_desc *desc = &sm1->p.shader_desc;
uint32_t register_index = reg->idx[0].offset;
switch (reg->type)
{
case VKD3DSPR_TEMP:
desc->temp_count = max(desc->temp_count, register_index + 1);
break;
case VKD3DSPR_CONST:
record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, register_index, from_def);
break;
case VKD3DSPR_CONST2:
record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 2048 + register_index, from_def);
break;
case VKD3DSPR_CONST3:
record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 4096 + register_index, from_def);
break;
case VKD3DSPR_CONST4:
record_constant_register(sm1, VKD3D_SHADER_D3DBC_FLOAT_CONSTANT_REGISTER, 6144 + register_index, from_def);
break;
case VKD3DSPR_CONSTINT:
record_constant_register(sm1, VKD3D_SHADER_D3DBC_INT_CONSTANT_REGISTER, register_index, from_def);
break;
case VKD3DSPR_CONSTBOOL:
record_constant_register(sm1, VKD3D_SHADER_D3DBC_BOOL_CONSTANT_REGISTER, register_index, from_def);
break;
default:
break;
}
add_signature_element_from_register(sm1, reg, false, mask);
}
/* Read a parameter token from the input stream, and possibly a relative
* addressing token. */
static void shader_sm1_read_param(struct vkd3d_shader_sm1_parser *sm1,
const uint32_t **ptr, uint32_t *token, uint32_t *addr_token)
{
if (*ptr >= sm1->end)
{
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF,
"Attempted to read a parameter token, but no more tokens are remaining.");
sm1->abort = true;
*token = 0;
return;
}
*token = read_u32(ptr);
if (!has_relative_address(*token))
return;
/* PS >= 3.0 have relative addressing (with token)
* VS >= 2.0 have relative addressing (with token)
* VS >= 1.0 < 2.0 have relative addressing (without token)
* The version check below should work in general. */
if (sm1->p.shader_version.major < 2)
{
*addr_token = (1u << 31)
| ((VKD3DSPR_ADDR << VKD3D_SM1_REGISTER_TYPE_SHIFT2) & VKD3D_SM1_REGISTER_TYPE_MASK2)
| ((VKD3DSPR_ADDR << VKD3D_SM1_REGISTER_TYPE_SHIFT) & VKD3D_SM1_REGISTER_TYPE_MASK)
| (VKD3D_SM1_SWIZZLE_DEFAULT << VKD3D_SM1_SWIZZLE_SHIFT);
return;
}
if (*ptr >= sm1->end)
{
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF,
"Attempted to read an indirect addressing token, but no more tokens are remaining.");
sm1->abort = true;
*addr_token = 0;
return;
}
*addr_token = read_u32(ptr);
}
/* Skip the parameter tokens for an opcode. */
static void shader_sm1_skip_opcode(const struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr,
const struct vkd3d_sm1_opcode_info *opcode_info, uint32_t opcode_token)
{
unsigned int length;
/* Version 2.0+ shaders may contain address tokens, but fortunately they
* have a useful length mask - use it here. Version 1.x shaders contain no
* such tokens. */
if (sm1->p.shader_version.major >= 2)
{
length = (opcode_token & VKD3D_SM1_INSTRUCTION_LENGTH_MASK) >> VKD3D_SM1_INSTRUCTION_LENGTH_SHIFT;
*ptr += length;
return;
}
*ptr += (opcode_info->dst_count + opcode_info->src_count);
}
static void shader_sm1_destroy(struct vkd3d_shader_parser *parser)
{
struct vkd3d_shader_sm1_parser *sm1 = vkd3d_shader_sm1_parser(parser);
shader_instruction_array_destroy(&parser->instructions);
free_shader_desc(&sm1->p.shader_desc);
vkd3d_free(sm1);
}
static void shader_sm1_read_src_param(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr,
struct vkd3d_shader_src_param *src_param)
{
struct vkd3d_shader_src_param *src_rel_addr = NULL;
uint32_t token, addr_token;
shader_sm1_read_param(sm1, ptr, &token, &addr_token);
if (has_relative_address(token))
{
if (!(src_rel_addr = shader_parser_get_src_params(&sm1->p, 1)))
{
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY,
"Out of memory.");
sm1->abort = true;
return;
}
shader_sm1_parse_src_param(addr_token, NULL, src_rel_addr);
}
shader_sm1_parse_src_param(token, src_rel_addr, src_param);
}
static void shader_sm1_read_dst_param(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr,
struct vkd3d_shader_dst_param *dst_param)
{
struct vkd3d_shader_src_param *dst_rel_addr = NULL;
uint32_t token, addr_token;
shader_sm1_read_param(sm1, ptr, &token, &addr_token);
if (has_relative_address(token))
{
if (!(dst_rel_addr = shader_parser_get_src_params(&sm1->p, 1)))
{
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY,
"Out of memory.");
sm1->abort = true;
return;
}
shader_sm1_parse_src_param(addr_token, NULL, dst_rel_addr);
}
shader_sm1_parse_dst_param(token, dst_rel_addr, dst_param);
}
static void shader_sm1_read_semantic(struct vkd3d_shader_sm1_parser *sm1,
const uint32_t **ptr, struct vkd3d_shader_semantic *semantic)
{
enum vkd3d_sm1_resource_type resource_type;
struct vkd3d_shader_register_range *range;
uint32_t usage_token, dst_token;
if (*ptr >= sm1->end || sm1->end - *ptr < 2)
{
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF,
"Attempted to read a declaration instruction, but not enough tokens are remaining.");
sm1->abort = true;
return;
}
usage_token = read_u32(ptr);
dst_token = read_u32(ptr);
semantic->usage = (usage_token & VKD3D_SM1_DCL_USAGE_MASK) >> VKD3D_SM1_DCL_USAGE_SHIFT;
semantic->usage_idx = (usage_token & VKD3D_SM1_DCL_USAGE_INDEX_MASK) >> VKD3D_SM1_DCL_USAGE_INDEX_SHIFT;
resource_type = (usage_token & VKD3D_SM1_RESOURCE_TYPE_MASK) >> VKD3D_SM1_RESOURCE_TYPE_SHIFT;
if (resource_type >= ARRAY_SIZE(resource_type_table))
{
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_RESOURCE_TYPE,
"Invalid resource type %#x.", resource_type);
semantic->resource_type = VKD3D_SHADER_RESOURCE_NONE;
}
else
{
semantic->resource_type = resource_type_table[resource_type];
}
semantic->resource_data_type[0] = VKD3D_DATA_FLOAT;
semantic->resource_data_type[1] = VKD3D_DATA_FLOAT;
semantic->resource_data_type[2] = VKD3D_DATA_FLOAT;
semantic->resource_data_type[3] = VKD3D_DATA_FLOAT;
shader_sm1_parse_dst_param(dst_token, NULL, &semantic->resource.reg);
range = &semantic->resource.range;
range->space = 0;
range->first = range->last = semantic->resource.reg.reg.idx[0].offset;
add_signature_element_from_semantic(sm1, semantic);
}
static void shader_sm1_read_immconst(struct vkd3d_shader_sm1_parser *sm1, const uint32_t **ptr,
struct vkd3d_shader_src_param *src_param, enum vkd3d_immconst_type type, enum vkd3d_data_type data_type)
{
unsigned int count = type == VKD3D_IMMCONST_VEC4 ? 4 : 1;
if (*ptr >= sm1->end || sm1->end - *ptr < count)
{
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF,
"Attempted to read a constant definition, but not enough tokens are remaining. "
"%zu token(s) available, %u required.", sm1->end - *ptr, count);
sm1->abort = true;
return;
}
src_param->reg.type = VKD3DSPR_IMMCONST;
src_param->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT;
src_param->reg.non_uniform = false;
src_param->reg.data_type = data_type;
src_param->reg.idx[0].offset = ~0u;
src_param->reg.idx[0].rel_addr = NULL;
src_param->reg.idx[1].offset = ~0u;
src_param->reg.idx[1].rel_addr = NULL;
src_param->reg.idx[2].offset = ~0u;
src_param->reg.idx[2].rel_addr = NULL;
src_param->reg.idx_count = 0;
src_param->reg.immconst_type = type;
memcpy(src_param->reg.u.immconst_uint, *ptr, count * sizeof(uint32_t));
src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE;
src_param->modifiers = 0;
*ptr += count;
}
static void shader_sm1_read_comment(struct vkd3d_shader_sm1_parser *sm1)
{
const uint32_t **ptr = &sm1->ptr;
const char *comment;
unsigned int size;
size_t remaining;
uint32_t token;
if (*ptr >= sm1->end)
return;
remaining = sm1->end - *ptr;
token = **ptr;
while ((token & VKD3D_SM1_OPCODE_MASK) == VKD3D_SM1_OP_COMMENT)
{
size = (token & VKD3D_SM1_COMMENT_SIZE_MASK) >> VKD3D_SM1_COMMENT_SIZE_SHIFT;
if (size > --remaining)
{
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF,
"Encountered a %u token comment, but only %zu token(s) is/are remaining.",
size, remaining);
return;
}
comment = (const char *)++(*ptr);
remaining -= size;
*ptr += size;
if (size > 1 && *(const uint32_t *)comment == TAG_TEXT)
{
const char *end = comment + size * sizeof(token);
const char *p = comment + sizeof(token);
const char *line;
TRACE("// TEXT\n");
for (line = p; line < end; line = p)
{
if (!(p = memchr(line, '\n', end - line)))
p = end;
else
++p;
TRACE("// %s\n", debugstr_an(line, p - line));
}
}
else if (size)
{
TRACE("// %s\n", debugstr_an(comment, size * sizeof(token)));
}
else
break;
if (!remaining)
break;
token = **ptr;
}
}
static void shader_sm1_validate_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins)
{
if ((ins->handler_idx == VKD3DSIH_BREAKP || ins->handler_idx == VKD3DSIH_IF) && ins->flags)
{
vkd3d_shader_parser_warning(&sm1->p, VKD3D_SHADER_WARNING_D3DBC_IGNORED_INSTRUCTION_FLAGS,
"Ignoring unexpected instruction flags %#x.", ins->flags);
ins->flags = 0;
}
}
static unsigned int mask_from_swizzle(unsigned int swizzle)
{
return (1u << vkd3d_swizzle_get_component(swizzle, 0))
| (1u << vkd3d_swizzle_get_component(swizzle, 1))
| (1u << vkd3d_swizzle_get_component(swizzle, 2))
| (1u << vkd3d_swizzle_get_component(swizzle, 3));
}
static void shader_sm1_read_instruction(struct vkd3d_shader_sm1_parser *sm1, struct vkd3d_shader_instruction *ins)
{
struct vkd3d_shader_src_param *src_params, *predicate;
const struct vkd3d_sm1_opcode_info *opcode_info;
struct vkd3d_shader_dst_param *dst_param;
const uint32_t **ptr = &sm1->ptr;
uint32_t opcode_token;
const uint32_t *p;
bool predicated;
unsigned int i;
shader_sm1_read_comment(sm1);
if (*ptr >= sm1->end)
{
WARN("End of byte-code, failed to read opcode.\n");
goto fail;
}
++sm1->p.location.line;
opcode_token = read_u32(ptr);
if (!(opcode_info = shader_sm1_get_opcode_info(sm1, opcode_token & VKD3D_SM1_OPCODE_MASK)))
{
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_INVALID_OPCODE,
"Invalid opcode %#x (token 0x%08x, shader version %u.%u).",
opcode_token & VKD3D_SM1_OPCODE_MASK, opcode_token,
sm1->p.shader_version.major, sm1->p.shader_version.minor);
goto fail;
}
ins->handler_idx = opcode_info->vkd3d_opcode;
ins->flags = (opcode_token & VKD3D_SM1_INSTRUCTION_FLAGS_MASK) >> VKD3D_SM1_INSTRUCTION_FLAGS_SHIFT;
ins->coissue = opcode_token & VKD3D_SM1_COISSUE;
ins->raw = false;
ins->structured = false;
predicated = !!(opcode_token & VKD3D_SM1_INSTRUCTION_PREDICATED);
ins->predicate = predicate = predicated ? shader_parser_get_src_params(&sm1->p, 1) : NULL;
ins->dst_count = opcode_info->dst_count;
ins->dst = dst_param = shader_parser_get_dst_params(&sm1->p, ins->dst_count);
ins->src_count = opcode_info->src_count;
ins->src = src_params = shader_parser_get_src_params(&sm1->p, ins->src_count);
if ((!predicate && predicated) || (!src_params && ins->src_count) || (!dst_param && ins->dst_count))
{
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory.");
goto fail;
}
ins->resource_type = VKD3D_SHADER_RESOURCE_NONE;
ins->resource_stride = 0;
ins->resource_data_type[0] = VKD3D_DATA_FLOAT;
ins->resource_data_type[1] = VKD3D_DATA_FLOAT;
ins->resource_data_type[2] = VKD3D_DATA_FLOAT;
ins->resource_data_type[3] = VKD3D_DATA_FLOAT;
memset(&ins->texel_offset, 0, sizeof(ins->texel_offset));
p = *ptr;
shader_sm1_skip_opcode(sm1, ptr, opcode_info, opcode_token);
if (*ptr > sm1->end)
{
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF,
"The current instruction ends %zu token(s) past the end of the shader.",
*ptr - sm1->end);
goto fail;
}
if (ins->handler_idx == VKD3DSIH_DCL)
{
shader_sm1_read_semantic(sm1, &p, &ins->declaration.semantic);
}
else if (ins->handler_idx == VKD3DSIH_DEF)
{
shader_sm1_read_dst_param(sm1, &p, dst_param);
shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_FLOAT);
shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true);
}
else if (ins->handler_idx == VKD3DSIH_DEFB)
{
shader_sm1_read_dst_param(sm1, &p, dst_param);
shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_SCALAR, VKD3D_DATA_UINT);
shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true);
}
else if (ins->handler_idx == VKD3DSIH_DEFI)
{
shader_sm1_read_dst_param(sm1, &p, dst_param);
shader_sm1_read_immconst(sm1, &p, &src_params[0], VKD3D_IMMCONST_VEC4, VKD3D_DATA_INT);
shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, true);
}
else
{
/* Destination token */
if (ins->dst_count)
{
shader_sm1_read_dst_param(sm1, &p, dst_param);
shader_sm1_scan_register(sm1, &dst_param->reg, dst_param->write_mask, false);
}
/* Predication token */
if (ins->predicate)
shader_sm1_read_src_param(sm1, &p, predicate);
/* Other source tokens */
for (i = 0; i < ins->src_count; ++i)
{
shader_sm1_read_src_param(sm1, &p, &src_params[i]);
shader_sm1_scan_register(sm1, &src_params[i].reg, mask_from_swizzle(src_params[i].swizzle), false);
}
}
if (sm1->abort)
{
sm1->abort = false;
goto fail;
}
shader_sm1_validate_instruction(sm1, ins);
return;
fail:
ins->handler_idx = VKD3DSIH_INVALID;
*ptr = sm1->end;
}
static bool shader_sm1_is_end(struct vkd3d_shader_sm1_parser *sm1)
{
const uint32_t **ptr = &sm1->ptr;
shader_sm1_read_comment(sm1);
if (*ptr >= sm1->end)
return true;
if (**ptr == VKD3D_SM1_END)
{
++(*ptr);
return true;
}
return false;
}
const struct vkd3d_shader_parser_ops shader_sm1_parser_ops =
{
.parser_destroy = shader_sm1_destroy,
};
static enum vkd3d_result shader_sm1_init(struct vkd3d_shader_sm1_parser *sm1,
const struct vkd3d_shader_compile_info *compile_info, struct vkd3d_shader_message_context *message_context)
{
const struct vkd3d_shader_location location = {.source_name = compile_info->source_name};
const uint32_t *code = compile_info->source.code;
size_t code_size = compile_info->source.size;
struct vkd3d_shader_desc *shader_desc;
struct vkd3d_shader_version version;
uint16_t shader_type;
size_t token_count;
token_count = code_size / sizeof(*sm1->start);
if (token_count < 2)
{
vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_D3DBC_UNEXPECTED_EOF,
"Invalid shader size %zu (token count %zu). At least 2 tokens are required.",
code_size, token_count);
return VKD3D_ERROR_INVALID_SHADER;
}
TRACE("Version: 0x%08x.\n", code[0]);
shader_type = code[0] >> 16;
version.major = VKD3D_SM1_VERSION_MAJOR(code[0]);
version.minor = VKD3D_SM1_VERSION_MINOR(code[0]);
switch (shader_type)
{
case VKD3D_SM1_VS:
version.type = VKD3D_SHADER_TYPE_VERTEX;
sm1->opcode_table = vs_opcode_table;
break;
case VKD3D_SM1_PS:
version.type = VKD3D_SHADER_TYPE_PIXEL;
sm1->opcode_table = ps_opcode_table;
break;
default:
vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_D3DBC_INVALID_VERSION_TOKEN,
"Invalid shader type %#x (token 0x%08x).", shader_type, code[0]);
return VKD3D_ERROR_INVALID_SHADER;
}
if (!shader_ver_le(&version, 3, 0))
{
vkd3d_shader_error(message_context, &location, VKD3D_SHADER_ERROR_D3DBC_INVALID_VERSION_TOKEN,
"Invalid shader version %u.%u (token 0x%08x).", version.major, version.minor, code[0]);
return VKD3D_ERROR_INVALID_SHADER;
}
sm1->start = &code[1];
sm1->end = &code[token_count];
/* Estimate instruction count to avoid reallocation in most shaders. */
if (!vkd3d_shader_parser_init(&sm1->p, message_context, compile_info->source_name, &version, &shader_sm1_parser_ops,
code_size != ~(size_t)0 ? token_count / 4u + 4 : 16))
return VKD3D_ERROR_OUT_OF_MEMORY;
shader_desc = &sm1->p.shader_desc;
shader_desc->byte_code = code;
shader_desc->byte_code_size = code_size;
sm1->ptr = sm1->start;
return VKD3D_OK;
}
static uint32_t get_external_constant_count(struct vkd3d_shader_sm1_parser *sm1,
enum vkd3d_shader_d3dbc_constant_register set)
{
unsigned int j;
/* Find the highest constant index which is not written by a DEF
* instruction. We can't (easily) use an FFZ function for this since it
* needs to be limited by the highest used register index. */
for (j = sm1->p.shader_desc.flat_constant_count[set].used; j > 0; --j)
{
if (!bitmap_is_set(sm1->constant_def_mask[set], j - 1))
return j;
}
return 0;
}
int vkd3d_shader_sm1_parser_create(const struct vkd3d_shader_compile_info *compile_info,
struct vkd3d_shader_message_context *message_context, struct vkd3d_shader_parser **parser)
{
struct vkd3d_shader_instruction_array *instructions;
struct vkd3d_shader_instruction *ins;
struct vkd3d_shader_sm1_parser *sm1;
unsigned int i;
int ret;
if (!(sm1 = vkd3d_calloc(1, sizeof(*sm1))))
{
ERR("Failed to allocate parser.\n");
return VKD3D_ERROR_OUT_OF_MEMORY;
}
if ((ret = shader_sm1_init(sm1, compile_info, message_context)) < 0)
{
WARN("Failed to initialise shader parser, ret %d.\n", ret);
vkd3d_free(sm1);
return ret;
}
instructions = &sm1->p.instructions;
while (!shader_sm1_is_end(sm1))
{
if (!shader_instruction_array_reserve(instructions, instructions->count + 1))
{
ERR("Failed to allocate instructions.\n");
vkd3d_shader_parser_error(&sm1->p, VKD3D_SHADER_ERROR_D3DBC_OUT_OF_MEMORY, "Out of memory.");
shader_sm1_destroy(&sm1->p);
return VKD3D_ERROR_OUT_OF_MEMORY;
}
ins = &instructions->elements[instructions->count];
shader_sm1_read_instruction(sm1, ins);
if (ins->handler_idx == VKD3DSIH_INVALID)
{
WARN("Encountered unrecognized or invalid instruction.\n");
shader_sm1_destroy(&sm1->p);
return VKD3D_ERROR_INVALID_SHADER;
}
++instructions->count;
}
*parser = &sm1->p;
for (i = 0; i < ARRAY_SIZE(sm1->p.shader_desc.flat_constant_count); ++i)
sm1->p.shader_desc.flat_constant_count[i].external = get_external_constant_count(sm1, i);
return sm1->p.failed ? VKD3D_ERROR_INVALID_SHADER : VKD3D_OK;
}
bool hlsl_sm1_register_from_semantic(struct hlsl_ctx *ctx, const struct hlsl_semantic *semantic,
bool output, D3DSHADER_PARAM_REGISTER_TYPE *type, unsigned int *reg)
{
unsigned int i;
static const struct
{
const char *semantic;
bool output;
enum vkd3d_shader_type shader_type;
unsigned int major_version;
D3DSHADER_PARAM_REGISTER_TYPE type;
DWORD offset;
}
register_table[] =
{
{"color", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_INPUT},
{"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 1, D3DSPR_TEXTURE},
{"color", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT},
{"depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT},
{"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_DEPTHOUT},
{"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_COLOROUT},
{"color", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_INPUT},
{"texcoord", false, VKD3D_SHADER_TYPE_PIXEL, 2, D3DSPR_TEXTURE},
{"color", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT},
{"depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT},
{"sv_depth", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_DEPTHOUT},
{"sv_target", true, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_COLOROUT},
{"sv_position", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION},
{"vface", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_FACE},
{"vpos", false, VKD3D_SHADER_TYPE_PIXEL, 3, D3DSPR_MISCTYPE, D3DSMO_POSITION},
{"color", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_ATTROUT},
{"fog", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_FOG},
{"position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION},
{"psize", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE},
{"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_RASTOUT, D3DSRO_POSITION},
{"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 1, D3DSPR_TEXCRDOUT},
{"color", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_ATTROUT},
{"fog", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_FOG},
{"position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION},
{"psize", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POINT_SIZE},
{"sv_position", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_RASTOUT, D3DSRO_POSITION},
{"texcoord", true, VKD3D_SHADER_TYPE_VERTEX, 2, D3DSPR_TEXCRDOUT},
};
for (i = 0; i < ARRAY_SIZE(register_table); ++i)
{
if (!ascii_strcasecmp(semantic->name, register_table[i].semantic)
&& output == register_table[i].output
&& ctx->profile->type == register_table[i].shader_type
&& ctx->profile->major_version == register_table[i].major_version)
{
*type = register_table[i].type;
if (register_table[i].type == D3DSPR_MISCTYPE || register_table[i].type == D3DSPR_RASTOUT)
*reg = register_table[i].offset;
else
*reg = semantic->index;
return true;
}
}
return false;
}
bool hlsl_sm1_usage_from_semantic(const struct hlsl_semantic *semantic, D3DDECLUSAGE *usage, uint32_t *usage_idx)
{
static const struct
{
const char *name;
D3DDECLUSAGE usage;
}
semantics[] =
{
{"binormal", D3DDECLUSAGE_BINORMAL},
{"blendindices", D3DDECLUSAGE_BLENDINDICES},
{"blendweight", D3DDECLUSAGE_BLENDWEIGHT},
{"color", D3DDECLUSAGE_COLOR},
{"depth", D3DDECLUSAGE_DEPTH},
{"fog", D3DDECLUSAGE_FOG},
{"normal", D3DDECLUSAGE_NORMAL},
{"position", D3DDECLUSAGE_POSITION},
{"positiont", D3DDECLUSAGE_POSITIONT},
{"psize", D3DDECLUSAGE_PSIZE},
{"sample", D3DDECLUSAGE_SAMPLE},
{"sv_depth", D3DDECLUSAGE_DEPTH},
{"sv_position", D3DDECLUSAGE_POSITION},
{"sv_target", D3DDECLUSAGE_COLOR},
{"tangent", D3DDECLUSAGE_TANGENT},
{"tessfactor", D3DDECLUSAGE_TESSFACTOR},
{"texcoord", D3DDECLUSAGE_TEXCOORD},
};
unsigned int i;
for (i = 0; i < ARRAY_SIZE(semantics); ++i)
{
if (!ascii_strcasecmp(semantic->name, semantics[i].name))
{
*usage = semantics[i].usage;
*usage_idx = semantic->index;
return true;
}
}
return false;
}
static uint32_t sm1_version(enum vkd3d_shader_type type, unsigned int major, unsigned int minor)
{
if (type == VKD3D_SHADER_TYPE_VERTEX)
return D3DVS_VERSION(major, minor);
else
return D3DPS_VERSION(major, minor);
}
static D3DXPARAMETER_CLASS sm1_class(const struct hlsl_type *type)
{
switch (type->class)
{
case HLSL_CLASS_ARRAY:
return sm1_class(type->e.array.type);
case HLSL_CLASS_MATRIX:
assert(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
return D3DXPC_MATRIX_COLUMNS;
else
return D3DXPC_MATRIX_ROWS;
case HLSL_CLASS_OBJECT:
return D3DXPC_OBJECT;
case HLSL_CLASS_SCALAR:
return D3DXPC_SCALAR;
case HLSL_CLASS_STRUCT:
return D3DXPC_STRUCT;
case HLSL_CLASS_VECTOR:
return D3DXPC_VECTOR;
default:
ERR("Invalid class %#x.\n", type->class);
vkd3d_unreachable();
}
}
static D3DXPARAMETER_TYPE sm1_base_type(const struct hlsl_type *type)
{
switch (type->base_type)
{
case HLSL_TYPE_BOOL:
return D3DXPT_BOOL;
case HLSL_TYPE_FLOAT:
case HLSL_TYPE_HALF:
return D3DXPT_FLOAT;
case HLSL_TYPE_INT:
case HLSL_TYPE_UINT:
return D3DXPT_INT;
case HLSL_TYPE_PIXELSHADER:
return D3DXPT_PIXELSHADER;
case HLSL_TYPE_SAMPLER:
switch (type->sampler_dim)
{
case HLSL_SAMPLER_DIM_1D:
return D3DXPT_SAMPLER1D;
case HLSL_SAMPLER_DIM_2D:
return D3DXPT_SAMPLER2D;
case HLSL_SAMPLER_DIM_3D:
return D3DXPT_SAMPLER3D;
case HLSL_SAMPLER_DIM_CUBE:
return D3DXPT_SAMPLERCUBE;
case HLSL_SAMPLER_DIM_GENERIC:
return D3DXPT_SAMPLER;
default:
ERR("Invalid dimension %#x.\n", type->sampler_dim);
vkd3d_unreachable();
}
break;
case HLSL_TYPE_STRING:
return D3DXPT_STRING;
case HLSL_TYPE_TEXTURE:
switch (type->sampler_dim)
{
case HLSL_SAMPLER_DIM_1D:
return D3DXPT_TEXTURE1D;
case HLSL_SAMPLER_DIM_2D:
return D3DXPT_TEXTURE2D;
case HLSL_SAMPLER_DIM_3D:
return D3DXPT_TEXTURE3D;
case HLSL_SAMPLER_DIM_CUBE:
return D3DXPT_TEXTURECUBE;
case HLSL_SAMPLER_DIM_GENERIC:
return D3DXPT_TEXTURE;
default:
ERR("Invalid dimension %#x.\n", type->sampler_dim);
vkd3d_unreachable();
}
break;
case HLSL_TYPE_VERTEXSHADER:
return D3DXPT_VERTEXSHADER;
case HLSL_TYPE_VOID:
return D3DXPT_VOID;
default:
vkd3d_unreachable();
}
}
static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type, unsigned int ctab_start)
{
const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type);
unsigned int array_size = hlsl_get_multiarray_size(type);
unsigned int field_count = 0;
size_t fields_offset = 0;
size_t i;
if (type->bytecode_offset)
return;
if (array_type->class == HLSL_CLASS_STRUCT)
{
field_count = array_type->e.record.field_count;
for (i = 0; i < field_count; ++i)
{
struct hlsl_struct_field *field = &array_type->e.record.fields[i];
field->name_bytecode_offset = put_string(buffer, field->name);
write_sm1_type(buffer, field->type, ctab_start);
}
fields_offset = bytecode_align(buffer) - ctab_start;
for (i = 0; i < field_count; ++i)
{
struct hlsl_struct_field *field = &array_type->e.record.fields[i];
put_u32(buffer, field->name_bytecode_offset - ctab_start);
put_u32(buffer, field->type->bytecode_offset - ctab_start);
}
}
type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm1_class(type), sm1_base_type(array_type)));
put_u32(buffer, vkd3d_make_u32(type->dimy, type->dimx));
put_u32(buffer, vkd3d_make_u32(array_size, field_count));
put_u32(buffer, fields_offset);
}
static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort)
{
struct hlsl_ir_var *var;
list_remove(&to_sort->extern_entry);
LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry)
{
if (strcmp(to_sort->name, var->name) < 0)
{
list_add_before(&var->extern_entry, &to_sort->extern_entry);
return;
}
}
list_add_tail(sorted, &to_sort->extern_entry);
}
static void sm1_sort_externs(struct hlsl_ctx *ctx)
{
struct list sorted = LIST_INIT(sorted);
struct hlsl_ir_var *var, *next;
LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
{
if (var->is_uniform)
sm1_sort_extern(&sorted, var);
}
list_move_tail(&ctx->extern_vars, &sorted);
}
static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
struct hlsl_ir_function_decl *entry_func)
{
size_t ctab_offset, ctab_start, ctab_end, vars_start, size_offset, creator_offset, offset;
unsigned int uniform_count = 0;
struct hlsl_ir_var *var;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
{
unsigned int r;
for (r = 0; r <= HLSL_REGSET_LAST; ++r)
{
if (var->semantic.name || !var->regs[r].allocated)
continue;
++uniform_count;
if (var->is_param && var->is_uniform)
{
struct vkd3d_string_buffer *name;
if (!(name = hlsl_get_string_buffer(ctx)))
{
buffer->status = VKD3D_ERROR_OUT_OF_MEMORY;
return;
}
vkd3d_string_buffer_printf(name, "$%s", var->name);
vkd3d_free((char *)var->name);
var->name = hlsl_strdup(ctx, name->buffer);
hlsl_release_string_buffer(ctx, name);
}
}
}
sm1_sort_externs(ctx);
size_offset = put_u32(buffer, 0);
ctab_offset = put_u32(buffer, VKD3D_MAKE_TAG('C','T','A','B'));
ctab_start = put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE));
creator_offset = put_u32(buffer, 0);
put_u32(buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version));
put_u32(buffer, uniform_count);
put_u32(buffer, sizeof(D3DXSHADER_CONSTANTTABLE)); /* offset of constants */
put_u32(buffer, 0); /* FIXME: flags */
put_u32(buffer, 0); /* FIXME: target string */
vars_start = bytecode_align(buffer);
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
{
unsigned int r;
for (r = 0; r <= HLSL_REGSET_LAST; ++r)
{
if (var->semantic.name || !var->regs[r].allocated)
continue;
put_u32(buffer, 0); /* name */
if (r == HLSL_REGSET_NUMERIC)
{
put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id));
put_u32(buffer, var->data_type->reg_size[r] / 4);
}
else
{
put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].id));
put_u32(buffer, var->bind_count[r]);
}
put_u32(buffer, 0); /* type */
put_u32(buffer, 0); /* FIXME: default value */
}
}
uniform_count = 0;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
{
unsigned int r;
for (r = 0; r <= HLSL_REGSET_LAST; ++r)
{
size_t var_offset, name_offset;
if (var->semantic.name || !var->regs[r].allocated)
continue;
var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t));
name_offset = put_string(buffer, var->name);
set_u32(buffer, var_offset, name_offset - ctab_start);
write_sm1_type(buffer, var->data_type, ctab_start);
set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start);
++uniform_count;
}
}
offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL));
set_u32(buffer, creator_offset, offset - ctab_start);
ctab_end = bytecode_align(buffer);
set_u32(buffer, size_offset, vkd3d_make_u32(D3DSIO_COMMENT, (ctab_end - ctab_offset) / sizeof(uint32_t)));
}
static uint32_t sm1_encode_register_type(D3DSHADER_PARAM_REGISTER_TYPE type)
{
return ((type << D3DSP_REGTYPE_SHIFT) & D3DSP_REGTYPE_MASK)
| ((type << D3DSP_REGTYPE_SHIFT2) & D3DSP_REGTYPE_MASK2);
}
struct sm1_instruction
{
D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode;
struct sm1_dst_register
{
D3DSHADER_PARAM_REGISTER_TYPE type;
D3DSHADER_PARAM_DSTMOD_TYPE mod;
unsigned int writemask;
uint32_t reg;
} dst;
struct sm1_src_register
{
D3DSHADER_PARAM_REGISTER_TYPE type;
D3DSHADER_PARAM_SRCMOD_TYPE mod;
unsigned int swizzle;
uint32_t reg;
} srcs[3];
unsigned int src_count;
unsigned int has_dst;
};
static void write_sm1_dst_register(struct vkd3d_bytecode_buffer *buffer, const struct sm1_dst_register *reg)
{
assert(reg->writemask);
put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->writemask << 16) | reg->reg);
}
static void write_sm1_src_register(struct vkd3d_bytecode_buffer *buffer,
const struct sm1_src_register *reg)
{
put_u32(buffer, (1u << 31) | sm1_encode_register_type(reg->type) | reg->mod | (reg->swizzle << 16) | reg->reg);
}
static void write_sm1_instruction(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
const struct sm1_instruction *instr)
{
uint32_t token = instr->opcode;
unsigned int i;
if (ctx->profile->major_version > 1)
token |= (instr->has_dst + instr->src_count) << D3DSI_INSTLENGTH_SHIFT;
put_u32(buffer, token);
if (instr->has_dst)
write_sm1_dst_register(buffer, &instr->dst);
for (i = 0; i < instr->src_count; ++i)
write_sm1_src_register(buffer, &instr->srcs[i]);
};
static void sm1_map_src_swizzle(struct sm1_src_register *src, unsigned int map_writemask)
{
src->swizzle = hlsl_map_swizzle(src->swizzle, map_writemask);
}
static void write_sm1_dp2add(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
const struct hlsl_reg *dst, const struct hlsl_reg *src1, const struct hlsl_reg *src2,
const struct hlsl_reg *src3)
{
struct sm1_instruction instr =
{
.opcode = D3DSIO_DP2ADD,
.dst.type = D3DSPR_TEMP,
.dst.writemask = dst->writemask,
.dst.reg = dst->id,
.has_dst = 1,
.srcs[0].type = D3DSPR_TEMP,
.srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask),
.srcs[0].reg = src1->id,
.srcs[1].type = D3DSPR_TEMP,
.srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask),
.srcs[1].reg = src2->id,
.srcs[2].type = D3DSPR_TEMP,
.srcs[2].swizzle = hlsl_swizzle_from_writemask(src3->writemask),
.srcs[2].reg = src3->id,
.src_count = 3,
};
write_sm1_instruction(ctx, buffer, &instr);
}
static void write_sm1_binary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst,
const struct hlsl_reg *src1, const struct hlsl_reg *src2)
{
struct sm1_instruction instr =
{
.opcode = opcode,
.dst.type = D3DSPR_TEMP,
.dst.writemask = dst->writemask,
.dst.reg = dst->id,
.has_dst = 1,
.srcs[0].type = D3DSPR_TEMP,
.srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask),
.srcs[0].reg = src1->id,
.srcs[1].type = D3DSPR_TEMP,
.srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask),
.srcs[1].reg = src2->id,
.src_count = 2,
};
sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask);
sm1_map_src_swizzle(&instr.srcs[1], instr.dst.writemask);
write_sm1_instruction(ctx, buffer, &instr);
}
static void write_sm1_binary_op_dot(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst,
const struct hlsl_reg *src1, const struct hlsl_reg *src2)
{
struct sm1_instruction instr =
{
.opcode = opcode,
.dst.type = D3DSPR_TEMP,
.dst.writemask = dst->writemask,
.dst.reg = dst->id,
.has_dst = 1,
.srcs[0].type = D3DSPR_TEMP,
.srcs[0].swizzle = hlsl_swizzle_from_writemask(src1->writemask),
.srcs[0].reg = src1->id,
.srcs[1].type = D3DSPR_TEMP,
.srcs[1].swizzle = hlsl_swizzle_from_writemask(src2->writemask),
.srcs[1].reg = src2->id,
.src_count = 2,
};
write_sm1_instruction(ctx, buffer, &instr);
}
static void write_sm1_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode, const struct hlsl_reg *dst,
const struct hlsl_reg *src, D3DSHADER_PARAM_SRCMOD_TYPE src_mod, D3DSHADER_PARAM_DSTMOD_TYPE dst_mod)
{
struct sm1_instruction instr =
{
.opcode = opcode,
.dst.type = D3DSPR_TEMP,
.dst.mod = dst_mod,
.dst.writemask = dst->writemask,
.dst.reg = dst->id,
.has_dst = 1,
.srcs[0].type = D3DSPR_TEMP,
.srcs[0].swizzle = hlsl_swizzle_from_writemask(src->writemask),
.srcs[0].reg = src->id,
.srcs[0].mod = src_mod,
.src_count = 1,
};
sm1_map_src_swizzle(&instr.srcs[0], instr.dst.writemask);
write_sm1_instruction(ctx, buffer, &instr);
}
static void write_sm1_constant_defs(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
{
unsigned int i, x;
for (i = 0; i < ctx->constant_defs.count; ++i)
{
const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i];
uint32_t token = D3DSIO_DEF;
const struct sm1_dst_register reg =
{
.type = D3DSPR_CONST,
.writemask = VKD3DSP_WRITEMASK_ALL,
.reg = constant_reg->index,
};
if (ctx->profile->major_version > 1)
token |= 5 << D3DSI_INSTLENGTH_SHIFT;
put_u32(buffer, token);
write_sm1_dst_register(buffer, &reg);
for (x = 0; x < 4; ++x)
put_f32(buffer, constant_reg->value.f[x]);
}
}
static void write_sm1_semantic_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
const struct hlsl_ir_var *var, bool output)
{
struct sm1_dst_register reg = {0};
uint32_t token, usage_idx;
D3DDECLUSAGE usage;
bool ret;
if (hlsl_sm1_register_from_semantic(ctx, &var->semantic, output, &reg.type, &reg.reg))
{
usage = 0;
usage_idx = 0;
}
else
{
ret = hlsl_sm1_usage_from_semantic(&var->semantic, &usage, &usage_idx);
assert(ret);
reg.type = output ? D3DSPR_OUTPUT : D3DSPR_INPUT;
reg.reg = var->regs[HLSL_REGSET_NUMERIC].id;
}
token = D3DSIO_DCL;
if (ctx->profile->major_version > 1)
token |= 2 << D3DSI_INSTLENGTH_SHIFT;
put_u32(buffer, token);
token = (1u << 31);
token |= usage << D3DSP_DCL_USAGE_SHIFT;
token |= usage_idx << D3DSP_DCL_USAGEINDEX_SHIFT;
put_u32(buffer, token);
reg.writemask = (1 << var->data_type->dimx) - 1;
write_sm1_dst_register(buffer, &reg);
}
static void write_sm1_semantic_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
{
bool write_in = false, write_out = false;
struct hlsl_ir_var *var;
if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version >= 2)
write_in = true;
else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version == 3)
write_in = write_out = true;
else if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && ctx->profile->major_version < 3)
write_in = true;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
{
if (write_in && var->is_input_semantic)
write_sm1_semantic_dcl(ctx, buffer, var, false);
if (write_out && var->is_output_semantic)
write_sm1_semantic_dcl(ctx, buffer, var, true);
}
}
static void write_sm1_sampler_dcl(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
unsigned int reg_id, enum hlsl_sampler_dim sampler_dim)
{
struct sm1_dst_register reg = {0};
uint32_t token, res_type = 0;
token = D3DSIO_DCL;
if (ctx->profile->major_version > 1)
token |= 2 << D3DSI_INSTLENGTH_SHIFT;
put_u32(buffer, token);
switch (sampler_dim)
{
case HLSL_SAMPLER_DIM_1D:
res_type = VKD3D_SM1_RESOURCE_TEXTURE_1D;
break;
case HLSL_SAMPLER_DIM_2D:
res_type = VKD3D_SM1_RESOURCE_TEXTURE_2D;
break;
case HLSL_SAMPLER_DIM_CUBE:
res_type = VKD3D_SM1_RESOURCE_TEXTURE_CUBE;
break;
case HLSL_SAMPLER_DIM_3D:
res_type = VKD3D_SM1_RESOURCE_TEXTURE_3D;
break;
default:
vkd3d_unreachable();
break;
}
token = (1u << 31);
token |= res_type << VKD3D_SM1_RESOURCE_TYPE_SHIFT;
put_u32(buffer, token);
reg.type = D3DSPR_SAMPLER;
reg.writemask = VKD3DSP_WRITEMASK_ALL;
reg.reg = reg_id;
write_sm1_dst_register(buffer, &reg);
}
static void write_sm1_sampler_dcls(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
{
enum hlsl_sampler_dim sampler_dim;
unsigned int i, count, reg_id;
struct hlsl_ir_var *var;
if (ctx->profile->major_version < 2)
return;
LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
{
if (!var->regs[HLSL_REGSET_SAMPLERS].allocated)
continue;
count = var->bind_count[HLSL_REGSET_SAMPLERS];
for (i = 0; i < count; ++i)
{
if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used)
{
sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim;
if (sampler_dim == HLSL_SAMPLER_DIM_GENERIC)
{
/* These can appear in sm4-style combined sample instructions. */
hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered.");
continue;
}
reg_id = var->regs[HLSL_REGSET_SAMPLERS].id + i;
write_sm1_sampler_dcl(ctx, buffer, reg_id, sampler_dim);
}
}
}
}
static void write_sm1_constant(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
const struct hlsl_ir_node *instr)
{
const struct hlsl_ir_constant *constant = hlsl_ir_constant(instr);
struct sm1_instruction sm1_instr =
{
.opcode = D3DSIO_MOV,
.dst.type = D3DSPR_TEMP,
.dst.reg = instr->reg.id,
.dst.writemask = instr->reg.writemask,
.has_dst = 1,
.srcs[0].type = D3DSPR_CONST,
.srcs[0].reg = constant->reg.id,
.srcs[0].swizzle = hlsl_swizzle_from_writemask(constant->reg.writemask),
.src_count = 1,
};
assert(instr->reg.allocated);
assert(constant->reg.allocated);
sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask);
write_sm1_instruction(ctx, buffer, &sm1_instr);
}
static void write_sm1_per_component_unary_op(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
const struct hlsl_ir_node *instr, D3DSHADER_INSTRUCTION_OPCODE_TYPE opcode)
{
struct hlsl_ir_expr *expr = hlsl_ir_expr(instr);
struct hlsl_ir_node *arg1 = expr->operands[0].node;
unsigned int i;
for (i = 0; i < instr->data_type->dimx; ++i)
{
struct hlsl_reg src = arg1->reg, dst = instr->reg;
src.writemask = hlsl_combine_writemasks(src.writemask, 1u << i);
dst.writemask = hlsl_combine_writemasks(dst.writemask, 1u << i);
write_sm1_unary_op(ctx, buffer, opcode, &dst, &src, 0, 0);
}
}
static void write_sm1_expr(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr)
{
struct hlsl_ir_expr *expr = hlsl_ir_expr(instr);
struct hlsl_ir_node *arg1 = expr->operands[0].node;
struct hlsl_ir_node *arg2 = expr->operands[1].node;
struct hlsl_ir_node *arg3 = expr->operands[2].node;
assert(instr->reg.allocated);
if (instr->data_type->base_type != HLSL_TYPE_FLOAT)
{
/* These need to be lowered. */
hlsl_fixme(ctx, &instr->loc, "SM1 non-float expression.");
return;
}
switch (expr->op)
{
case HLSL_OP1_ABS:
write_sm1_unary_op(ctx, buffer, D3DSIO_ABS, &instr->reg, &arg1->reg, 0, 0);
break;
case HLSL_OP1_DSX:
write_sm1_unary_op(ctx, buffer, D3DSIO_DSX, &instr->reg, &arg1->reg, 0, 0);
break;
case HLSL_OP1_DSY:
write_sm1_unary_op(ctx, buffer, D3DSIO_DSY, &instr->reg, &arg1->reg, 0, 0);
break;
case HLSL_OP1_EXP2:
write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_EXP);
break;
case HLSL_OP1_LOG2:
write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_LOG);
break;
case HLSL_OP1_NEG:
write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, D3DSPSM_NEG, 0);
break;
case HLSL_OP1_SAT:
write_sm1_unary_op(ctx, buffer, D3DSIO_MOV, &instr->reg, &arg1->reg, 0, D3DSPDM_SATURATE);
break;
case HLSL_OP1_RCP:
write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RCP);
break;
case HLSL_OP1_RSQ:
write_sm1_per_component_unary_op(ctx, buffer, instr, D3DSIO_RSQ);
break;
case HLSL_OP2_ADD:
write_sm1_binary_op(ctx, buffer, D3DSIO_ADD, &instr->reg, &arg1->reg, &arg2->reg);
break;
case HLSL_OP2_MAX:
write_sm1_binary_op(ctx, buffer, D3DSIO_MAX, &instr->reg, &arg1->reg, &arg2->reg);
break;
case HLSL_OP2_MIN:
write_sm1_binary_op(ctx, buffer, D3DSIO_MIN, &instr->reg, &arg1->reg, &arg2->reg);
break;
case HLSL_OP2_MUL:
write_sm1_binary_op(ctx, buffer, D3DSIO_MUL, &instr->reg, &arg1->reg, &arg2->reg);
break;
case HLSL_OP1_FRACT:
write_sm1_unary_op(ctx, buffer, D3DSIO_FRC, &instr->reg, &arg1->reg, D3DSPSM_NONE, 0);
break;
case HLSL_OP2_DOT:
switch (arg1->data_type->dimx)
{
case 4:
write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP4, &instr->reg, &arg1->reg, &arg2->reg);
break;
case 3:
write_sm1_binary_op_dot(ctx, buffer, D3DSIO_DP3, &instr->reg, &arg1->reg, &arg2->reg);
break;
default:
vkd3d_unreachable();
}
break;
case HLSL_OP3_DP2ADD:
write_sm1_dp2add(ctx, buffer, &instr->reg, &arg1->reg, &arg2->reg, &arg3->reg);
break;
default:
hlsl_fixme(ctx, &instr->loc, "SM1 \"%s\" expression.", debug_hlsl_expr_op(expr->op));
break;
}
}
static void write_sm1_jump(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr)
{
const struct hlsl_ir_jump *jump = hlsl_ir_jump(instr);
switch (jump->type)
{
case HLSL_IR_JUMP_DISCARD_NEG:
{
struct hlsl_reg *reg = &jump->condition.node->reg;
struct sm1_instruction instr =
{
.opcode = VKD3D_SM1_OP_TEXKILL,
.dst.type = D3DSPR_TEMP,
.dst.reg = reg->id,
.dst.writemask = reg->writemask,
.has_dst = 1,
};
write_sm1_instruction(ctx, buffer, &instr);
break;
}
default:
hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.\n", hlsl_jump_type_to_string(jump->type));
}
}
static void write_sm1_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, const struct hlsl_ir_node *instr)
{
const struct hlsl_ir_load *load = hlsl_ir_load(instr);
const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &load->src);
struct sm1_instruction sm1_instr =
{
.opcode = D3DSIO_MOV,
.dst.type = D3DSPR_TEMP,
.dst.reg = instr->reg.id,
.dst.writemask = instr->reg.writemask,
.has_dst = 1,
.srcs[0].type = D3DSPR_TEMP,
.srcs[0].reg = reg.id,
.srcs[0].swizzle = hlsl_swizzle_from_writemask(reg.writemask),
.src_count = 1,
};
assert(instr->reg.allocated);
if (load->src.var->is_uniform)
{
assert(reg.allocated);
sm1_instr.srcs[0].type = D3DSPR_CONST;
}
else if (load->src.var->is_input_semantic)
{
if (!hlsl_sm1_register_from_semantic(ctx, &load->src.var->semantic,
false, &sm1_instr.srcs[0].type, &sm1_instr.srcs[0].reg))
{
assert(reg.allocated);
sm1_instr.srcs[0].type = D3DSPR_INPUT;
sm1_instr.srcs[0].reg = reg.id;
}
else
sm1_instr.srcs[0].swizzle = hlsl_swizzle_from_writemask((1 << load->src.var->data_type->dimx) - 1);
}
sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask);
write_sm1_instruction(ctx, buffer, &sm1_instr);
}
static void write_sm1_resource_load(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
const struct hlsl_ir_node *instr)
{
const struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
struct hlsl_ir_node *coords = load->coords.node;
unsigned int sampler_offset, reg_id;
struct sm1_instruction sm1_instr;
sampler_offset = hlsl_offset_from_deref_safe(ctx, &load->resource);
reg_id = load->resource.var->regs[HLSL_REGSET_SAMPLERS].id + sampler_offset;
sm1_instr = (struct sm1_instruction)
{
.opcode = D3DSIO_TEX,
.dst.type = D3DSPR_TEMP,
.dst.reg = instr->reg.id,
.dst.writemask = instr->reg.writemask,
.has_dst = 1,
.srcs[0].type = D3DSPR_TEMP,
.srcs[0].reg = coords->reg.id,
.srcs[0].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL),
.srcs[1].type = D3DSPR_SAMPLER,
.srcs[1].reg = reg_id,
.srcs[1].swizzle = hlsl_swizzle_from_writemask(VKD3DSP_WRITEMASK_ALL),
.src_count = 2,
};
assert(instr->reg.allocated);
write_sm1_instruction(ctx, buffer, &sm1_instr);
}
static void write_sm1_store(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
const struct hlsl_ir_node *instr)
{
const struct hlsl_ir_store *store = hlsl_ir_store(instr);
const struct hlsl_ir_node *rhs = store->rhs.node;
const struct hlsl_reg reg = hlsl_reg_from_deref(ctx, &store->lhs);
struct sm1_instruction sm1_instr =
{
.opcode = D3DSIO_MOV,
.dst.type = D3DSPR_TEMP,
.dst.reg = reg.id,
.dst.writemask = hlsl_combine_writemasks(reg.writemask, store->writemask),
.has_dst = 1,
.srcs[0].type = D3DSPR_TEMP,
.srcs[0].reg = rhs->reg.id,
.srcs[0].swizzle = hlsl_swizzle_from_writemask(rhs->reg.writemask),
.src_count = 1,
};
if (store->lhs.var->data_type->class == HLSL_CLASS_MATRIX)
{
FIXME("Matrix writemasks need to be lowered.\n");
return;
}
if (store->lhs.var->is_output_semantic)
{
if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL && ctx->profile->major_version == 1)
{
sm1_instr.dst.type = D3DSPR_TEMP;
sm1_instr.dst.reg = 0;
}
else if (!hlsl_sm1_register_from_semantic(ctx, &store->lhs.var->semantic,
true, &sm1_instr.dst.type, &sm1_instr.dst.reg))
{
assert(reg.allocated);
sm1_instr.dst.type = D3DSPR_OUTPUT;
sm1_instr.dst.reg = reg.id;
}
else
sm1_instr.dst.writemask = (1u << store->lhs.var->data_type->dimx) - 1;
}
else
assert(reg.allocated);
sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask);
write_sm1_instruction(ctx, buffer, &sm1_instr);
}
static void write_sm1_swizzle(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
const struct hlsl_ir_node *instr)
{
const struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr);
const struct hlsl_ir_node *val = swizzle->val.node;
struct sm1_instruction sm1_instr =
{
.opcode = D3DSIO_MOV,
.dst.type = D3DSPR_TEMP,
.dst.reg = instr->reg.id,
.dst.writemask = instr->reg.writemask,
.has_dst = 1,
.srcs[0].type = D3DSPR_TEMP,
.srcs[0].reg = val->reg.id,
.srcs[0].swizzle = hlsl_combine_swizzles(hlsl_swizzle_from_writemask(val->reg.writemask),
swizzle->swizzle, instr->data_type->dimx),
.src_count = 1,
};
assert(instr->reg.allocated);
assert(val->reg.allocated);
sm1_map_src_swizzle(&sm1_instr.srcs[0], sm1_instr.dst.writemask);
write_sm1_instruction(ctx, buffer, &sm1_instr);
}
static void write_sm1_instructions(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer,
const struct hlsl_ir_function_decl *entry_func)
{
const struct hlsl_ir_node *instr;
LIST_FOR_EACH_ENTRY(instr, &entry_func->body.instrs, struct hlsl_ir_node, entry)
{
if (instr->data_type)
{
if (instr->data_type->class == HLSL_CLASS_MATRIX)
{
/* These need to be lowered. */
hlsl_fixme(ctx, &instr->loc, "SM1 matrix expression.");
continue;
}
else if (instr->data_type->class == HLSL_CLASS_OBJECT)
{
hlsl_fixme(ctx, &instr->loc, "Object copy.");
break;
}
assert(instr->data_type->class == HLSL_CLASS_SCALAR || instr->data_type->class == HLSL_CLASS_VECTOR);
}
switch (instr->type)
{
case HLSL_IR_CALL:
vkd3d_unreachable();
case HLSL_IR_CONSTANT:
write_sm1_constant(ctx, buffer, instr);
break;
case HLSL_IR_EXPR:
write_sm1_expr(ctx, buffer, instr);
break;
case HLSL_IR_JUMP:
write_sm1_jump(ctx, buffer, instr);
break;
case HLSL_IR_LOAD:
write_sm1_load(ctx, buffer, instr);
break;
case HLSL_IR_RESOURCE_LOAD:
write_sm1_resource_load(ctx, buffer, instr);
break;
case HLSL_IR_STORE:
write_sm1_store(ctx, buffer, instr);
break;
case HLSL_IR_SWIZZLE:
write_sm1_swizzle(ctx, buffer, instr);
break;
default:
hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type));
}
}
}
int hlsl_sm1_write(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func, struct vkd3d_shader_code *out)
{
struct vkd3d_bytecode_buffer buffer = {0};
put_u32(&buffer, sm1_version(ctx->profile->type, ctx->profile->major_version, ctx->profile->minor_version));
write_sm1_uniforms(ctx, &buffer, entry_func);
write_sm1_constant_defs(ctx, &buffer);
write_sm1_semantic_dcls(ctx, &buffer);
write_sm1_sampler_dcls(ctx, &buffer);
write_sm1_instructions(ctx, &buffer, entry_func);
put_u32(&buffer, D3DSIO_END);
if (buffer.status)
ctx->result = buffer.status;
if (!ctx->result)
{
out->code = buffer.data;
out->size = buffer.size;
}
else
{
vkd3d_free(buffer.data);
}
return ctx->result;
}