mirror of
				https://gitlab.winehq.org/wine/vkd3d.git
				synced 2025-09-12 18:50:22 -07:00 
			
		
		
		
	
		
			
				
	
	
		
			13844 lines
		
	
	
		
			490 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			13844 lines
		
	
	
		
			490 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
| /*
 | |
|  * HLSL optimization and code generation
 | |
|  *
 | |
|  * Copyright 2019-2020 Zebediah Figura for CodeWeavers
 | |
|  *
 | |
|  * This library is free software; you can redistribute it and/or
 | |
|  * modify it under the terms of the GNU Lesser General Public
 | |
|  * License as published by the Free Software Foundation; either
 | |
|  * version 2.1 of the License, or (at your option) any later version.
 | |
|  *
 | |
|  * This library is distributed in the hope that it will be useful,
 | |
|  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | |
|  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | |
|  * Lesser General Public License for more details.
 | |
|  *
 | |
|  * You should have received a copy of the GNU Lesser General Public
 | |
|  * License along with this library; if not, write to the Free Software
 | |
|  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
 | |
|  */
 | |
| 
 | |
| #include "hlsl.h"
 | |
| #include "vkd3d_shader_private.h"
 | |
| #include "vkd3d_d3dcommon.h"
 | |
| #include <stdio.h>
 | |
| #include <math.h>
 | |
| 
 | |
| /* The shift that corresponds to the D3D_SIF_TEXTURE_COMPONENTS mask. */
 | |
| #define VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT 2
 | |
| 
 | |
| /* TODO: remove when no longer needed, only used for new_offset_instr_from_deref() */
 | |
| static struct hlsl_ir_node *new_offset_from_path_index(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct hlsl_type *type, struct hlsl_ir_node *base_offset, struct hlsl_ir_node *idx,
 | |
|         enum hlsl_regset regset, unsigned int *offset_component, const struct vkd3d_shader_location *loc)
 | |
| {
 | |
|     struct hlsl_ir_node *idx_offset = NULL;
 | |
|     struct hlsl_ir_node *c;
 | |
| 
 | |
|     switch (type->class)
 | |
|     {
 | |
|         case HLSL_CLASS_VECTOR:
 | |
|             if (idx->type != HLSL_IR_CONSTANT)
 | |
|             {
 | |
|                 hlsl_fixme(ctx, &idx->loc, "Non-constant vector addressing.");
 | |
|                 break;
 | |
|             }
 | |
|             *offset_component += hlsl_ir_constant(idx)->value.u[0].u;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_CLASS_MATRIX:
 | |
|         {
 | |
|             idx_offset = idx;
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         case HLSL_CLASS_ARRAY:
 | |
|         {
 | |
|             unsigned int size = hlsl_type_get_array_element_reg_size(type->e.array.type, regset);
 | |
| 
 | |
|             if (regset == HLSL_REGSET_NUMERIC)
 | |
|             {
 | |
|                 VKD3D_ASSERT(size % 4 == 0);
 | |
|                 size /= 4;
 | |
|             }
 | |
| 
 | |
|             c = hlsl_block_add_uint_constant(ctx, block, size, loc);
 | |
| 
 | |
|             idx_offset = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, c, idx);
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         case HLSL_CLASS_STRUCT:
 | |
|         {
 | |
|             unsigned int field_idx = hlsl_ir_constant(idx)->value.u[0].u;
 | |
|             struct hlsl_struct_field *field = &type->e.record.fields[field_idx];
 | |
|             unsigned int field_offset = field->reg_offset[regset];
 | |
| 
 | |
|             if (regset == HLSL_REGSET_NUMERIC)
 | |
|             {
 | |
|                 VKD3D_ASSERT(*offset_component == 0);
 | |
|                 *offset_component = field_offset % 4;
 | |
|                 field_offset /= 4;
 | |
|             }
 | |
| 
 | |
|             idx_offset = hlsl_block_add_uint_constant(ctx, block, field_offset, loc);
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         default:
 | |
|             vkd3d_unreachable();
 | |
|     }
 | |
| 
 | |
|     if (idx_offset)
 | |
|         return hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, base_offset, idx_offset);
 | |
|     return base_offset;
 | |
| }
 | |
| 
 | |
| /* TODO: remove when no longer needed, only used for replace_deref_path_with_offset() */
 | |
| static struct hlsl_ir_node *new_offset_instr_from_deref(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         const struct hlsl_deref *deref, unsigned int *offset_component, const struct vkd3d_shader_location *loc)
 | |
| {
 | |
|     enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref);
 | |
|     struct hlsl_ir_node *offset;
 | |
|     struct hlsl_type *type;
 | |
|     unsigned int i;
 | |
| 
 | |
|     *offset_component = 0;
 | |
| 
 | |
|     hlsl_block_init(block);
 | |
| 
 | |
|     offset = hlsl_block_add_uint_constant(ctx, block, 0, loc);
 | |
| 
 | |
|     VKD3D_ASSERT(deref->var);
 | |
|     type = deref->var->data_type;
 | |
| 
 | |
|     for (i = 0; i < deref->path_len; ++i)
 | |
|     {
 | |
|         struct hlsl_block idx_block;
 | |
| 
 | |
|         hlsl_block_init(&idx_block);
 | |
|         offset = new_offset_from_path_index(ctx, &idx_block, type, offset,
 | |
|                 deref->path[i].node, regset, offset_component, loc);
 | |
|         hlsl_block_add_block(block, &idx_block);
 | |
|         type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node);
 | |
|     }
 | |
| 
 | |
|     return offset;
 | |
| }
 | |
| 
 | |
| /* TODO: remove when no longer needed, only used for transform_deref_paths_into_offsets() */
 | |
| static bool replace_deref_path_with_offset(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
 | |
|         struct hlsl_ir_node *instr)
 | |
| {
 | |
|     unsigned int offset_component;
 | |
|     struct hlsl_ir_node *offset;
 | |
|     struct hlsl_block block;
 | |
|     struct hlsl_type *type;
 | |
| 
 | |
|     VKD3D_ASSERT(deref->var);
 | |
|     VKD3D_ASSERT(!hlsl_deref_is_lowered(deref));
 | |
| 
 | |
|     type = hlsl_deref_get_type(ctx, deref);
 | |
| 
 | |
|     /* Instructions that directly refer to structs or arrays (instead of single-register components)
 | |
|      * are removed later by dce. So it is not a problem to just cleanup their derefs. */
 | |
|     if (type->class == HLSL_CLASS_STRUCT || type->class == HLSL_CLASS_ARRAY)
 | |
|     {
 | |
|         hlsl_cleanup_deref(deref);
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     deref->data_type = type;
 | |
| 
 | |
|     offset = new_offset_instr_from_deref(ctx, &block, deref, &offset_component, &instr->loc);
 | |
|     list_move_before(&instr->entry, &block.instrs);
 | |
| 
 | |
|     hlsl_cleanup_deref(deref);
 | |
|     hlsl_src_from_node(&deref->rel_offset, offset);
 | |
|     deref->const_offset = offset_component;
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool clean_constant_deref_offset_srcs(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
 | |
|         struct hlsl_ir_node *instr)
 | |
| {
 | |
|     if (deref->rel_offset.node && deref->rel_offset.node->type == HLSL_IR_CONSTANT)
 | |
|     {
 | |
|         enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref);
 | |
| 
 | |
|         if (regset == HLSL_REGSET_NUMERIC)
 | |
|             deref->const_offset += 4 * hlsl_ir_constant(deref->rel_offset.node)->value.u[0].u;
 | |
|         else
 | |
|             deref->const_offset += hlsl_ir_constant(deref->rel_offset.node)->value.u[0].u;
 | |
|         hlsl_src_remove(&deref->rel_offset);
 | |
|         return true;
 | |
|     }
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| 
 | |
| /* For a uniform variable, create a temp copy of it so, in case a value is
 | |
|  * stored to the uniform at some point the shader, all derefs can be diverted
 | |
|  * to this temp copy instead.
 | |
|  * Also, promote the uniform to an extern var. */
 | |
| static void prepend_uniform_copy(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_var *uniform)
 | |
| {
 | |
|     struct hlsl_ir_node *store;
 | |
|     struct hlsl_ir_load *load;
 | |
|     struct hlsl_ir_var *temp;
 | |
|     char *new_name;
 | |
| 
 | |
|     uniform->is_uniform = 1;
 | |
|     list_add_tail(&ctx->extern_vars, &uniform->extern_entry);
 | |
| 
 | |
|     if (!(new_name = hlsl_sprintf_alloc(ctx, "<temp-%s>", uniform->name)))
 | |
|         return;
 | |
| 
 | |
|     if (!(temp = hlsl_new_var(ctx, new_name, uniform->data_type,
 | |
|             &uniform->loc, NULL, uniform->storage_modifiers, NULL)))
 | |
|     {
 | |
|         vkd3d_free(new_name);
 | |
|         return;
 | |
|     }
 | |
|     list_add_before(&uniform->scope_entry, &temp->scope_entry);
 | |
| 
 | |
|     uniform->temp_copy = temp;
 | |
| 
 | |
|     if (!(load = hlsl_new_var_load(ctx, uniform, &uniform->loc)))
 | |
|         return;
 | |
|     list_add_head(&block->instrs, &load->node.entry);
 | |
| 
 | |
|     if (!(store = hlsl_new_simple_store(ctx, temp, &load->node)))
 | |
|         return;
 | |
|     list_add_after(&load->node.entry, &store->entry);
 | |
| }
 | |
| 
 | |
| /* If a uniform is written to at some point in the shader, all dereferences
 | |
|  * must point to the temp copy instead, which is what this pass does. */
 | |
| static bool divert_written_uniform_derefs_to_temp(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
 | |
|         struct hlsl_ir_node *instr)
 | |
| {
 | |
|     if (!deref->var->is_uniform || !deref->var->first_write)
 | |
|         return false;
 | |
| 
 | |
|     /* Skip derefs from instructions before first write so copies from the
 | |
|      * uniform to the temp are unaffected. */
 | |
|     if (instr->index < deref->var->first_write)
 | |
|         return false;
 | |
| 
 | |
|     VKD3D_ASSERT(deref->var->temp_copy);
 | |
| 
 | |
|     deref->var = deref->var->temp_copy;
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static void validate_field_semantic(struct hlsl_ctx *ctx, struct hlsl_struct_field *field)
 | |
| {
 | |
|     if (!field->semantic.name && hlsl_is_numeric_type(hlsl_get_multiarray_element_type(field->type))
 | |
|             && !field->semantic.reported_missing)
 | |
|     {
 | |
|         hlsl_error(ctx, &field->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC,
 | |
|                 "Field '%s' is missing a semantic.", field->name);
 | |
|         field->semantic.reported_missing = true;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static enum hlsl_base_type base_type_get_semantic_equivalent(enum hlsl_base_type base)
 | |
| {
 | |
|     switch (base)
 | |
|     {
 | |
|         case HLSL_TYPE_BOOL:
 | |
|         case HLSL_TYPE_INT:
 | |
|         case HLSL_TYPE_MIN16UINT:
 | |
|         case HLSL_TYPE_UINT:
 | |
|             return HLSL_TYPE_UINT;
 | |
| 
 | |
|         case HLSL_TYPE_HALF:
 | |
|         case HLSL_TYPE_FLOAT:
 | |
|             return HLSL_TYPE_FLOAT;
 | |
| 
 | |
|         case HLSL_TYPE_DOUBLE:
 | |
|             return HLSL_TYPE_DOUBLE;
 | |
|     }
 | |
| 
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| static bool types_are_semantic_equivalent(struct hlsl_ctx *ctx, const struct hlsl_type *type1,
 | |
|         const struct hlsl_type *type2)
 | |
| {
 | |
|     if (ctx->profile->major_version < 4)
 | |
|         return true;
 | |
| 
 | |
|     if (hlsl_type_is_primitive_array(type1))
 | |
|     {
 | |
|         return hlsl_type_is_primitive_array(type2)
 | |
|                 && type1->e.array.array_type == type2->e.array.array_type
 | |
|                 && type1->e.array.elements_count == type2->e.array.elements_count
 | |
|                 && types_are_semantic_equivalent(ctx, type1->e.array.type, type2->e.array.type);
 | |
|     }
 | |
| 
 | |
|     if (type1->e.numeric.dimx != type2->e.numeric.dimx)
 | |
|         return false;
 | |
| 
 | |
|     return base_type_get_semantic_equivalent(type1->e.numeric.type)
 | |
|             == base_type_get_semantic_equivalent(type2->e.numeric.type);
 | |
| }
 | |
| 
 | |
| static struct hlsl_ir_var *add_semantic_var(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
 | |
|         struct hlsl_ir_var *var, struct hlsl_type *type, uint32_t modifiers, struct hlsl_semantic *semantic,
 | |
|         uint32_t index, bool output, bool force_align, bool create, const struct vkd3d_shader_location *loc)
 | |
| {
 | |
|     struct hlsl_semantic new_semantic;
 | |
|     struct hlsl_ir_var *ext_var;
 | |
|     const char *prefix;
 | |
|     char *new_name;
 | |
| 
 | |
|     if (hlsl_type_is_primitive_array(type))
 | |
|         prefix = type->e.array.array_type == HLSL_ARRAY_PATCH_OUTPUT ? "outputpatch" : "inputprim";
 | |
|     else
 | |
|         prefix = output ? "output" : "input";
 | |
| 
 | |
|     if (!(new_name = hlsl_sprintf_alloc(ctx, "<%s-%s%u>", prefix, semantic->name, index)))
 | |
|         return NULL;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(ext_var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (!ascii_strcasecmp(ext_var->name, new_name))
 | |
|         {
 | |
|             VKD3D_ASSERT(hlsl_type_is_primitive_array(ext_var->data_type)
 | |
|                     || ext_var->data_type->class <= HLSL_CLASS_VECTOR);
 | |
|             VKD3D_ASSERT(hlsl_type_is_primitive_array(type) || type->class <= HLSL_CLASS_VECTOR);
 | |
| 
 | |
|             vkd3d_free(new_name);
 | |
| 
 | |
|             if (!create)
 | |
|                 return ext_var;
 | |
| 
 | |
|             if (output)
 | |
|             {
 | |
|                 if (index >= semantic->reported_duplicated_output_next_index)
 | |
|                 {
 | |
|                     hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
 | |
|                             "Output semantic \"%s%u\" is used multiple times.", semantic->name, index);
 | |
|                     hlsl_note(ctx, &ext_var->loc, VKD3D_SHADER_LOG_ERROR,
 | |
|                             "First use of \"%s%u\" is here.", semantic->name, index);
 | |
|                     semantic->reported_duplicated_output_next_index = index + 1;
 | |
|                 }
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 if (index >= semantic->reported_duplicated_input_incompatible_next_index
 | |
|                         && !types_are_semantic_equivalent(ctx, ext_var->data_type, type))
 | |
|                 {
 | |
|                     hlsl_error(ctx, loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
 | |
|                             "Input semantic \"%s%u\" is used multiple times with incompatible types.",
 | |
|                             semantic->name, index);
 | |
|                     hlsl_note(ctx, &ext_var->loc, VKD3D_SHADER_LOG_ERROR,
 | |
|                             "First declaration of \"%s%u\" is here.", semantic->name, index);
 | |
|                     semantic->reported_duplicated_input_incompatible_next_index = index + 1;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             return ext_var;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     VKD3D_ASSERT(create);
 | |
| 
 | |
|     if (!(hlsl_clone_semantic(ctx, &new_semantic, semantic)))
 | |
|     {
 | |
|         vkd3d_free(new_name);
 | |
|         return NULL;
 | |
|     }
 | |
|     new_semantic.index = index;
 | |
|     if (!(ext_var = hlsl_new_var(ctx, new_name, type, loc, &new_semantic, modifiers, NULL)))
 | |
|     {
 | |
|         vkd3d_free(new_name);
 | |
|         hlsl_cleanup_semantic(&new_semantic);
 | |
|         return NULL;
 | |
|     }
 | |
|     if (output)
 | |
|         ext_var->is_output_semantic = 1;
 | |
|     else
 | |
|         ext_var->is_input_semantic = 1;
 | |
|     ext_var->is_param = var->is_param;
 | |
|     ext_var->force_align = force_align;
 | |
|     list_add_before(&var->scope_entry, &ext_var->scope_entry);
 | |
|     list_add_tail(&func->extern_vars, &ext_var->extern_entry);
 | |
| 
 | |
|     return ext_var;
 | |
| }
 | |
| 
 | |
| static uint32_t combine_field_storage_modifiers(uint32_t modifiers, uint32_t field_modifiers)
 | |
| {
 | |
|     field_modifiers |= modifiers;
 | |
| 
 | |
|     /* TODO: 'sample' modifier is not supported yet. */
 | |
| 
 | |
|     /* 'nointerpolation' always takes precedence, next the same is done for
 | |
|      * 'sample', remaining modifiers are combined. */
 | |
|     if (field_modifiers & HLSL_STORAGE_NOINTERPOLATION)
 | |
|     {
 | |
|         field_modifiers &= ~HLSL_INTERPOLATION_MODIFIERS_MASK;
 | |
|         field_modifiers |= HLSL_STORAGE_NOINTERPOLATION;
 | |
|     }
 | |
| 
 | |
|     return field_modifiers;
 | |
| }
 | |
| 
 | |
| static void prepend_input_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
 | |
|         struct hlsl_block *block, uint32_t prim_index, struct hlsl_ir_load *lhs,
 | |
|         uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align)
 | |
| {
 | |
|     struct hlsl_type *type = lhs->node.data_type, *vector_type_src, *vector_type_dst;
 | |
|     struct vkd3d_shader_location *loc = &lhs->node.loc;
 | |
|     struct hlsl_ir_var *var = lhs->src.var;
 | |
|     struct hlsl_ir_node *c;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (!hlsl_is_numeric_type(type))
 | |
|     {
 | |
|         struct vkd3d_string_buffer *string;
 | |
|         if (!(string = hlsl_type_to_string(ctx, type)))
 | |
|             return;
 | |
|         hlsl_fixme(ctx, &var->loc, "Input semantics for type %s.", string->buffer);
 | |
|         hlsl_release_string_buffer(ctx, string);
 | |
|     }
 | |
|     if (!semantic->name)
 | |
|         return;
 | |
| 
 | |
|     vector_type_dst = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type));
 | |
|     vector_type_src = vector_type_dst;
 | |
|     if (ctx->profile->major_version < 4 && ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX)
 | |
|         vector_type_src = hlsl_get_vector_type(ctx, type->e.numeric.type, 4);
 | |
| 
 | |
|     if (hlsl_type_major_size(type) > 1)
 | |
|         force_align = true;
 | |
| 
 | |
|     for (i = 0; i < hlsl_type_major_size(type); ++i)
 | |
|     {
 | |
|         struct hlsl_ir_node *cast;
 | |
|         struct hlsl_ir_var *input;
 | |
|         struct hlsl_ir_load *load;
 | |
| 
 | |
|         if (hlsl_type_is_primitive_array(var->data_type))
 | |
|         {
 | |
|             struct hlsl_type *prim_type_src;
 | |
|             struct hlsl_deref prim_deref;
 | |
|             struct hlsl_ir_node *idx;
 | |
| 
 | |
|             if (!(prim_type_src = hlsl_new_array_type(ctx, vector_type_src, var->data_type->e.array.elements_count,
 | |
|                     var->data_type->e.array.array_type)))
 | |
|                 return;
 | |
|             prim_type_src->modifiers = var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK;
 | |
| 
 | |
|             if (!(input = add_semantic_var(ctx, func, var, prim_type_src,
 | |
|                     modifiers, semantic, semantic_index + i, false, force_align, true, loc)))
 | |
|                 return;
 | |
|             hlsl_init_simple_deref_from_var(&prim_deref, input);
 | |
| 
 | |
|             idx = hlsl_block_add_uint_constant(ctx, block, prim_index, &var->loc);
 | |
| 
 | |
|             if (!(load = hlsl_new_load_index(ctx, &prim_deref, idx, loc)))
 | |
|                 return;
 | |
|             hlsl_block_add_instr(block, &load->node);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             if (!(input = add_semantic_var(ctx, func, var, vector_type_src,
 | |
|                     modifiers, semantic, semantic_index + i, false, force_align, true, loc)))
 | |
|                 return;
 | |
| 
 | |
|             if (!(load = hlsl_new_var_load(ctx, input, &var->loc)))
 | |
|                 return;
 | |
|             hlsl_block_add_instr(block, &load->node);
 | |
|         }
 | |
| 
 | |
|         cast = hlsl_block_add_cast(ctx, block, &load->node, vector_type_dst, &var->loc);
 | |
| 
 | |
|         if (type->class == HLSL_CLASS_MATRIX)
 | |
|         {
 | |
|             c = hlsl_block_add_uint_constant(ctx, block, i, &var->loc);
 | |
| 
 | |
|             hlsl_block_add_store_index(ctx, block, &lhs->src, c, cast, 0, &var->loc);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             VKD3D_ASSERT(i == 0);
 | |
| 
 | |
|             hlsl_block_add_store_index(ctx, block, &lhs->src, NULL, cast, 0, &var->loc);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void prepend_input_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
 | |
|         struct hlsl_block *block, uint32_t prim_index, struct hlsl_ir_load *lhs,
 | |
|         uint32_t modifiers, struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align)
 | |
| {
 | |
|     struct vkd3d_shader_location *loc = &lhs->node.loc;
 | |
|     struct hlsl_type *type = lhs->node.data_type;
 | |
|     struct hlsl_ir_var *var = lhs->src.var;
 | |
|     struct hlsl_ir_node *c;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT)
 | |
|     {
 | |
|         struct hlsl_ir_load *element_load;
 | |
|         struct hlsl_struct_field *field;
 | |
|         uint32_t elem_semantic_index;
 | |
| 
 | |
|         for (i = 0; i < hlsl_type_element_count(type); ++i)
 | |
|         {
 | |
|             uint32_t element_modifiers;
 | |
| 
 | |
|             if (type->class == HLSL_CLASS_ARRAY)
 | |
|             {
 | |
|                 elem_semantic_index = semantic_index
 | |
|                         + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4;
 | |
|                 element_modifiers = modifiers;
 | |
|                 force_align = true;
 | |
| 
 | |
|                 if (hlsl_type_is_primitive_array(type))
 | |
|                     prim_index = i;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 field = &type->e.record.fields[i];
 | |
|                 if (hlsl_type_is_resource(field->type))
 | |
|                 {
 | |
|                     hlsl_fixme(ctx, &field->loc, "Prepend uniform copies for resource components within structs.");
 | |
|                     continue;
 | |
|                 }
 | |
|                 validate_field_semantic(ctx, field);
 | |
|                 semantic = &field->semantic;
 | |
|                 elem_semantic_index = semantic->index;
 | |
|                 loc = &field->loc;
 | |
|                 element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers);
 | |
|                 force_align = (i == 0);
 | |
|             }
 | |
| 
 | |
|             c = hlsl_block_add_uint_constant(ctx, block, i, &var->loc);
 | |
| 
 | |
|             /* This redundant load is expected to be deleted later by DCE. */
 | |
|             if (!(element_load = hlsl_new_load_index(ctx, &lhs->src, c, loc)))
 | |
|                 return;
 | |
|             hlsl_block_add_instr(block, &element_load->node);
 | |
| 
 | |
|             prepend_input_copy_recurse(ctx, func, block, prim_index, element_load,
 | |
|                     element_modifiers, semantic, elem_semantic_index, force_align);
 | |
|         }
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         prepend_input_copy(ctx, func, block, prim_index, lhs, modifiers, semantic, semantic_index, force_align);
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Split inputs into two variables representing the semantic and temp registers,
 | |
|  * and copy the former to the latter, so that writes to input variables work. */
 | |
| static void prepend_input_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var)
 | |
| {
 | |
|     struct hlsl_ir_load *load;
 | |
|     struct hlsl_block block;
 | |
| 
 | |
|     hlsl_block_init(&block);
 | |
| 
 | |
|     /* This redundant load is expected to be deleted later by DCE. */
 | |
|     if (!(load = hlsl_new_var_load(ctx, var, &var->loc)))
 | |
|         return;
 | |
|     hlsl_block_add_instr(&block, &load->node);
 | |
| 
 | |
|     prepend_input_copy_recurse(ctx, func, &block, 0, load, var->storage_modifiers,
 | |
|             &var->semantic, var->semantic.index, false);
 | |
| 
 | |
|     list_move_head(&func->body.instrs, &block.instrs);
 | |
| }
 | |
| 
 | |
| static void append_output_copy(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct hlsl_ir_function_decl *func, struct hlsl_ir_load *rhs, uint32_t modifiers,
 | |
|         struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align, bool create)
 | |
| {
 | |
|     struct hlsl_type *type = rhs->node.data_type, *vector_type;
 | |
|     struct vkd3d_shader_location *loc = &rhs->node.loc;
 | |
|     struct hlsl_ir_var *var = rhs->src.var;
 | |
|     struct hlsl_ir_node *c;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (!hlsl_is_numeric_type(type))
 | |
|     {
 | |
|         struct vkd3d_string_buffer *string;
 | |
|         if (!(string = hlsl_type_to_string(ctx, type)))
 | |
|             return;
 | |
|         hlsl_fixme(ctx, &var->loc, "Output semantics for type %s.", string->buffer);
 | |
|         hlsl_release_string_buffer(ctx, string);
 | |
|     }
 | |
|     if (!semantic->name)
 | |
|         return;
 | |
| 
 | |
|     vector_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type));
 | |
| 
 | |
|     if (hlsl_type_major_size(type) > 1)
 | |
|         force_align = true;
 | |
| 
 | |
|     for (i = 0; i < hlsl_type_major_size(type); ++i)
 | |
|     {
 | |
|         struct hlsl_ir_var *output;
 | |
|         struct hlsl_ir_node *load;
 | |
| 
 | |
|         if (!(output = add_semantic_var(ctx, func, var, vector_type,
 | |
|                 modifiers, semantic, semantic_index + i, true, force_align, create, loc)))
 | |
|             return;
 | |
| 
 | |
|         if (type->class == HLSL_CLASS_MATRIX)
 | |
|         {
 | |
|             c = hlsl_block_add_uint_constant(ctx, block, i, &var->loc);
 | |
|             load = hlsl_block_add_load_index(ctx, block, &rhs->src, c, &var->loc);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             VKD3D_ASSERT(i == 0);
 | |
| 
 | |
|             load = hlsl_block_add_load_index(ctx, block, &rhs->src, NULL, &var->loc);
 | |
|         }
 | |
| 
 | |
|         hlsl_block_add_simple_store(ctx, block, output, load);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void append_output_copy_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct hlsl_ir_function_decl *func, const struct hlsl_type *type, struct hlsl_ir_load *rhs, uint32_t modifiers,
 | |
|         struct hlsl_semantic *semantic, uint32_t semantic_index, bool force_align, bool create)
 | |
| {
 | |
|     struct vkd3d_shader_location *loc = &rhs->node.loc;
 | |
|     struct hlsl_ir_var *var = rhs->src.var;
 | |
|     struct hlsl_ir_node *c;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (type->class == HLSL_CLASS_ARRAY || type->class == HLSL_CLASS_STRUCT)
 | |
|     {
 | |
|         for (i = 0; i < hlsl_type_element_count(type); ++i)
 | |
|         {
 | |
|             uint32_t element_modifiers, elem_semantic_index;
 | |
|             const struct hlsl_type *element_type;
 | |
|             struct hlsl_ir_load *element_load;
 | |
|             struct hlsl_struct_field *field;
 | |
| 
 | |
|             if (type->class == HLSL_CLASS_ARRAY)
 | |
|             {
 | |
|                 elem_semantic_index = semantic_index
 | |
|                         + i * hlsl_type_get_array_element_reg_size(type->e.array.type, HLSL_REGSET_NUMERIC) / 4;
 | |
|                 element_type = type->e.array.type;
 | |
|                 element_modifiers = modifiers;
 | |
|                 force_align = true;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 field = &type->e.record.fields[i];
 | |
|                 if (hlsl_type_is_resource(field->type))
 | |
|                     continue;
 | |
|                 validate_field_semantic(ctx, field);
 | |
|                 semantic = &field->semantic;
 | |
|                 elem_semantic_index = semantic->index;
 | |
|                 loc = &field->loc;
 | |
|                 element_type = field->type;
 | |
|                 element_modifiers = combine_field_storage_modifiers(modifiers, field->storage_modifiers);
 | |
|                 force_align = (i == 0);
 | |
|             }
 | |
| 
 | |
|             c = hlsl_block_add_uint_constant(ctx, block, i, &var->loc);
 | |
| 
 | |
|             if (!(element_load = hlsl_new_load_index(ctx, &rhs->src, c, loc)))
 | |
|                 return;
 | |
|             hlsl_block_add_instr(block, &element_load->node);
 | |
| 
 | |
|             append_output_copy_recurse(ctx, block, func, element_type, element_load, element_modifiers, semantic,
 | |
|                     elem_semantic_index, force_align, create);
 | |
|         }
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         append_output_copy(ctx, block, func, rhs, modifiers, semantic, semantic_index, force_align, create);
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Split outputs into two variables representing the temp and semantic
 | |
|  * registers, and copy the former to the latter, so that reads from output
 | |
|  * variables work. */
 | |
| static void append_output_var_copy(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, struct hlsl_ir_var *var)
 | |
| {
 | |
|     struct hlsl_ir_load *load;
 | |
| 
 | |
|     /* This redundant load is expected to be deleted later by DCE. */
 | |
|     if (!(load = hlsl_new_var_load(ctx, var, &var->loc)))
 | |
|         return;
 | |
|     hlsl_block_add_instr(&func->body, &load->node);
 | |
| 
 | |
|     append_output_copy_recurse(ctx, &func->body, func, var->data_type, load, var->storage_modifiers,
 | |
|             &var->semantic, var->semantic.index, false, true);
 | |
| }
 | |
| 
 | |
| bool hlsl_transform_ir(struct hlsl_ctx *ctx, bool (*func)(struct hlsl_ctx *ctx, struct hlsl_ir_node *, void *),
 | |
|         struct hlsl_block *block, void *context)
 | |
| {
 | |
|     struct hlsl_ir_node *instr, *next;
 | |
|     bool progress = false;
 | |
| 
 | |
|     if (ctx->result)
 | |
|         return false;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         if (instr->type == HLSL_IR_IF)
 | |
|         {
 | |
|             struct hlsl_ir_if *iff = hlsl_ir_if(instr);
 | |
| 
 | |
|             progress |= hlsl_transform_ir(ctx, func, &iff->then_block, context);
 | |
|             progress |= hlsl_transform_ir(ctx, func, &iff->else_block, context);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_LOOP)
 | |
|         {
 | |
|             progress |= hlsl_transform_ir(ctx, func, &hlsl_ir_loop(instr)->body, context);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_SWITCH)
 | |
|         {
 | |
|             struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
 | |
|             struct hlsl_ir_switch_case *c;
 | |
| 
 | |
|             LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|             {
 | |
|                 progress |= hlsl_transform_ir(ctx, func, &c->body, context);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         progress |= func(ctx, instr, context);
 | |
|     }
 | |
| 
 | |
|     return progress;
 | |
| }
 | |
| 
 | |
| typedef bool (*PFN_lower_func)(struct hlsl_ctx *, struct hlsl_ir_node *, struct hlsl_block *);
 | |
| 
 | |
| static bool call_lower_func(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     PFN_lower_func func = context;
 | |
|     struct hlsl_block block;
 | |
| 
 | |
|     hlsl_block_init(&block);
 | |
|     if (func(ctx, instr, &block))
 | |
|     {
 | |
|         struct hlsl_ir_node *replacement = LIST_ENTRY(list_tail(&block.instrs), struct hlsl_ir_node, entry);
 | |
| 
 | |
|         list_move_before(&instr->entry, &block.instrs);
 | |
|         hlsl_replace_node(instr, replacement);
 | |
|         return true;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         hlsl_block_cleanup(&block);
 | |
|         return false;
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Specific form of transform_ir() for passes which convert a single instruction
 | |
|  * to a block of one or more instructions. This helper takes care of setting up
 | |
|  * the block and calling hlsl_replace_node_with_block(). */
 | |
| static bool lower_ir(struct hlsl_ctx *ctx, PFN_lower_func func, struct hlsl_block *block)
 | |
| {
 | |
|     return hlsl_transform_ir(ctx, call_lower_func, block, func);
 | |
| }
 | |
| 
 | |
| static bool transform_instr_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     bool res;
 | |
|     bool (*func)(struct hlsl_ctx *ctx, struct hlsl_deref *, struct hlsl_ir_node *) = context;
 | |
| 
 | |
|     switch(instr->type)
 | |
|     {
 | |
|         case HLSL_IR_LOAD:
 | |
|             res = func(ctx, &hlsl_ir_load(instr)->src, instr);
 | |
|             return res;
 | |
| 
 | |
|         case HLSL_IR_STORE:
 | |
|             res = func(ctx, &hlsl_ir_store(instr)->lhs, instr);
 | |
|             return res;
 | |
| 
 | |
|         case HLSL_IR_RESOURCE_LOAD:
 | |
|             res = func(ctx, &hlsl_ir_resource_load(instr)->resource, instr);
 | |
|             if (hlsl_ir_resource_load(instr)->sampler.var)
 | |
|                 res |= func(ctx, &hlsl_ir_resource_load(instr)->sampler, instr);
 | |
|             return res;
 | |
| 
 | |
|         case HLSL_IR_RESOURCE_STORE:
 | |
|             res = func(ctx, &hlsl_ir_resource_store(instr)->resource, instr);
 | |
|             return res;
 | |
| 
 | |
|         case HLSL_IR_INTERLOCKED:
 | |
|             res = func(ctx, &hlsl_ir_interlocked(instr)->dst, instr);
 | |
|             return res;
 | |
| 
 | |
|         default:
 | |
|             return false;
 | |
|     }
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool transform_derefs(struct hlsl_ctx *ctx,
 | |
|         bool (*func)(struct hlsl_ctx *ctx, struct hlsl_deref *, struct hlsl_ir_node *),
 | |
|         struct hlsl_block *block)
 | |
| {
 | |
|     return hlsl_transform_ir(ctx, transform_instr_derefs, block, func);
 | |
| }
 | |
| 
 | |
| struct recursive_call_ctx
 | |
| {
 | |
|     const struct hlsl_ir_function_decl **backtrace;
 | |
|     size_t count, capacity;
 | |
| };
 | |
| 
 | |
| static bool find_recursive_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     struct recursive_call_ctx *call_ctx = context;
 | |
|     struct hlsl_ir_function_decl *decl;
 | |
|     const struct hlsl_ir_call *call;
 | |
|     size_t i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_CALL)
 | |
|         return false;
 | |
|     call = hlsl_ir_call(instr);
 | |
|     decl = call->decl;
 | |
| 
 | |
|     for (i = 0; i < call_ctx->count; ++i)
 | |
|     {
 | |
|         if (call_ctx->backtrace[i] == decl)
 | |
|         {
 | |
|             hlsl_error(ctx, &call->node.loc, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL,
 | |
|                     "Recursive call to \"%s\".", decl->func->name);
 | |
|             /* Native returns E_NOTIMPL instead of E_FAIL here. */
 | |
|             ctx->result = VKD3D_ERROR_NOT_IMPLEMENTED;
 | |
|             return false;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (!hlsl_array_reserve(ctx, (void **)&call_ctx->backtrace, &call_ctx->capacity,
 | |
|             call_ctx->count + 1, sizeof(*call_ctx->backtrace)))
 | |
|         return false;
 | |
|     call_ctx->backtrace[call_ctx->count++] = decl;
 | |
| 
 | |
|     hlsl_transform_ir(ctx, find_recursive_calls, &decl->body, call_ctx);
 | |
| 
 | |
|     --call_ctx->count;
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static void insert_early_return_break(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_ir_function_decl *func, struct hlsl_ir_node *cf_instr)
 | |
| {
 | |
|     struct hlsl_block then_block;
 | |
|     struct hlsl_ir_load *load;
 | |
|     struct hlsl_ir_node *iff;
 | |
| 
 | |
|     hlsl_block_init(&then_block);
 | |
| 
 | |
|     if (!(load = hlsl_new_var_load(ctx, func->early_return_var, &cf_instr->loc)))
 | |
|         return;
 | |
|     list_add_after(&cf_instr->entry, &load->node.entry);
 | |
| 
 | |
|     hlsl_block_add_jump(ctx, &then_block, HLSL_IR_JUMP_BREAK, NULL, &cf_instr->loc);
 | |
| 
 | |
|     if (!(iff = hlsl_new_if(ctx, &load->node, &then_block, NULL, &cf_instr->loc)))
 | |
|         return;
 | |
|     list_add_after(&load->node.entry, &iff->entry);
 | |
| }
 | |
| 
 | |
| /* Remove HLSL_IR_JUMP_RETURN calls by altering subsequent control flow. */
 | |
| static bool lower_return(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
 | |
|         struct hlsl_block *block, bool in_loop)
 | |
| {
 | |
|     struct hlsl_ir_node *return_instr = NULL, *cf_instr = NULL;
 | |
|     struct hlsl_ir_node *instr, *next;
 | |
|     bool has_early_return = false;
 | |
| 
 | |
|     /* SM1 has no function calls. SM4 does, but native d3dcompiler inlines
 | |
|      * everything anyway. We are safest following suit.
 | |
|      *
 | |
|      * The basic idea is to keep track of whether the function has executed an
 | |
|      * early return in a synthesized boolean variable (func->early_return_var)
 | |
|      * and guard all code after the return on that variable being false. In the
 | |
|      * case of loops we also replace the return with a break.
 | |
|      *
 | |
|      * The following algorithm loops over instructions in a block, recursing
 | |
|      * into inferior CF blocks, until it hits one of the following two things:
 | |
|      *
 | |
|      * - A return statement. In this case, we remove everything after the return
 | |
|      *   statement in this block. We have to stop and do this in a separate
 | |
|      *   loop, because instructions must be deleted in reverse order (due to
 | |
|      *   def-use chains.)
 | |
|      *
 | |
|      *   If we're inside of a loop CF block, we can instead just turn the
 | |
|      *   return into a break, which offers the right semantics—except that it
 | |
|      *   won't break out of nested loops.
 | |
|      *
 | |
|      * - A CF block which contains a return statement. After calling
 | |
|      *   lower_return() on the CF block body, we stop, pull out everything after
 | |
|      *   the CF instruction, shove it into an if block, and then lower that if
 | |
|      *   block.
 | |
|      *
 | |
|      *   (We could return a "did we make progress" boolean like hlsl_transform_ir()
 | |
|      *   and run this pass multiple times, but we already know the only block
 | |
|      *   that still needs to be addressed, so there's not much point.)
 | |
|      *
 | |
|      *   If we're inside of a loop CF block, we again do things differently. We
 | |
|      *   already turned any returns into breaks. If the block we just processed
 | |
|      *   was conditional, then "break" did our work for us. If it was a loop,
 | |
|      *   we need to propagate that break to the outer loop.
 | |
|      *
 | |
|      * We return true if there was an early return anywhere in the block we just
 | |
|      * processed (including CF contained inside that block).
 | |
|      */
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         if (instr->type == HLSL_IR_CALL)
 | |
|         {
 | |
|             struct hlsl_ir_call *call = hlsl_ir_call(instr);
 | |
| 
 | |
|             lower_return(ctx, call->decl, &call->decl->body, false);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_IF)
 | |
|         {
 | |
|             struct hlsl_ir_if *iff = hlsl_ir_if(instr);
 | |
| 
 | |
|             has_early_return |= lower_return(ctx, func, &iff->then_block, in_loop);
 | |
|             has_early_return |= lower_return(ctx, func, &iff->else_block, in_loop);
 | |
| 
 | |
|             if (has_early_return)
 | |
|             {
 | |
|                 /* If we're in a loop, we don't need to do anything here. We
 | |
|                  * turned the return into a break, and that will already skip
 | |
|                  * anything that comes after this "if" block. */
 | |
|                 if (!in_loop)
 | |
|                 {
 | |
|                     cf_instr = instr;
 | |
|                     break;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_LOOP)
 | |
|         {
 | |
|             has_early_return |= lower_return(ctx, func, &hlsl_ir_loop(instr)->body, true);
 | |
| 
 | |
|             if (has_early_return)
 | |
|             {
 | |
|                 if (in_loop)
 | |
|                 {
 | |
|                     /* "instr" is a nested loop. "return" breaks out of all
 | |
|                      * loops, so break out of this one too now. */
 | |
|                     insert_early_return_break(ctx, func, instr);
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     cf_instr = instr;
 | |
|                     break;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_JUMP)
 | |
|         {
 | |
|             struct hlsl_ir_jump *jump = hlsl_ir_jump(instr);
 | |
|             struct hlsl_ir_node *constant, *store;
 | |
| 
 | |
|             if (jump->type == HLSL_IR_JUMP_RETURN)
 | |
|             {
 | |
|                 if (!(constant = hlsl_new_bool_constant(ctx, true, &jump->node.loc)))
 | |
|                     return false;
 | |
|                 list_add_before(&jump->node.entry, &constant->entry);
 | |
| 
 | |
|                 if (!(store = hlsl_new_simple_store(ctx, func->early_return_var, constant)))
 | |
|                     return false;
 | |
|                 list_add_after(&constant->entry, &store->entry);
 | |
| 
 | |
|                 has_early_return = true;
 | |
|                 if (in_loop)
 | |
|                 {
 | |
|                     jump->type = HLSL_IR_JUMP_BREAK;
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     return_instr = instr;
 | |
|                     break;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_SWITCH)
 | |
|         {
 | |
|             struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
 | |
|             struct hlsl_ir_switch_case *c;
 | |
| 
 | |
|             LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|             {
 | |
|                 has_early_return |= lower_return(ctx, func, &c->body, true);
 | |
|             }
 | |
| 
 | |
|             if (has_early_return)
 | |
|             {
 | |
|                 if (in_loop)
 | |
|                 {
 | |
|                     /* For a 'switch' nested in a loop append a break after the 'switch'. */
 | |
|                     insert_early_return_break(ctx, func, instr);
 | |
|                 }
 | |
|                 else
 | |
|                 {
 | |
|                     cf_instr = instr;
 | |
|                     break;
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (return_instr)
 | |
|     {
 | |
|         /* If we're in a loop, we should have used "break" instead. */
 | |
|         VKD3D_ASSERT(!in_loop);
 | |
| 
 | |
|         /* Iterate in reverse, to avoid use-after-free when unlinking sources from
 | |
|          * the "uses" list. */
 | |
|         LIST_FOR_EACH_ENTRY_SAFE_REV(instr, next, &block->instrs, struct hlsl_ir_node, entry)
 | |
|         {
 | |
|             list_remove(&instr->entry);
 | |
|             hlsl_free_instr(instr);
 | |
| 
 | |
|             /* Yes, we just freed it, but we're comparing pointers. */
 | |
|             if (instr == return_instr)
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
|     else if (cf_instr)
 | |
|     {
 | |
|         struct list *tail = list_tail(&block->instrs);
 | |
|         struct hlsl_ir_node *not, *load;
 | |
|         struct hlsl_block then_block;
 | |
| 
 | |
|         /* If we're in a loop, we should have used "break" instead. */
 | |
|         VKD3D_ASSERT(!in_loop);
 | |
| 
 | |
|         if (tail == &cf_instr->entry)
 | |
|             return has_early_return;
 | |
| 
 | |
|         hlsl_block_init(&then_block);
 | |
|         list_move_slice_tail(&then_block.instrs, list_next(&block->instrs, &cf_instr->entry), tail);
 | |
|         lower_return(ctx, func, &then_block, in_loop);
 | |
| 
 | |
|         load = hlsl_block_add_simple_load(ctx, block, func->early_return_var, &cf_instr->loc);
 | |
|         not = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_LOGIC_NOT, load, &cf_instr->loc);
 | |
|         hlsl_block_add_if(ctx, block, not, &then_block, NULL, &cf_instr->loc);
 | |
|     }
 | |
| 
 | |
|     return has_early_return;
 | |
| }
 | |
| 
 | |
| /* Remove HLSL_IR_CALL instructions by inlining them. */
 | |
| static bool lower_calls(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     const struct hlsl_ir_function_decl *decl;
 | |
|     struct hlsl_ir_call *call;
 | |
|     struct hlsl_block block;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_CALL)
 | |
|         return false;
 | |
|     call = hlsl_ir_call(instr);
 | |
|     decl = call->decl;
 | |
| 
 | |
|     if (!decl->has_body)
 | |
|         hlsl_error(ctx, &call->node.loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED,
 | |
|                 "Function \"%s\" is not defined.", decl->func->name);
 | |
| 
 | |
|     if (!hlsl_clone_block(ctx, &block, &decl->body))
 | |
|         return false;
 | |
|     list_move_before(&call->node.entry, &block.instrs);
 | |
| 
 | |
|     list_remove(&call->node.entry);
 | |
|     hlsl_free_instr(&call->node);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static struct hlsl_ir_node *add_zero_mipmap_level(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct hlsl_ir_node *index, const struct vkd3d_shader_location *loc)
 | |
| {
 | |
|     unsigned int dim_count = index->data_type->e.numeric.dimx;
 | |
|     struct hlsl_deref coords_deref;
 | |
|     struct hlsl_ir_var *coords;
 | |
|     struct hlsl_ir_node *zero;
 | |
| 
 | |
|     VKD3D_ASSERT(dim_count < 4);
 | |
| 
 | |
|     if (!(coords = hlsl_new_synthetic_var(ctx, "coords",
 | |
|             hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, dim_count + 1), loc)))
 | |
|         return NULL;
 | |
| 
 | |
|     hlsl_init_simple_deref_from_var(&coords_deref, coords);
 | |
|     hlsl_block_add_store_index(ctx, block, &coords_deref, NULL, index, (1u << dim_count) - 1, loc);
 | |
| 
 | |
|     zero = hlsl_block_add_uint_constant(ctx, block, 0, loc);
 | |
|     hlsl_block_add_store_index(ctx, block, &coords_deref, NULL, zero, 1u << dim_count, loc);
 | |
| 
 | |
|     return hlsl_block_add_simple_load(ctx, block, coords, loc);
 | |
| }
 | |
| 
 | |
| static bool lower_complex_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     unsigned int src_comp_count, dst_comp_count;
 | |
|     struct hlsl_type *src_type, *dst_type;
 | |
|     struct hlsl_deref var_deref;
 | |
|     bool broadcast, matrix_cast;
 | |
|     struct hlsl_ir_node *arg;
 | |
|     struct hlsl_ir_var *var;
 | |
|     unsigned int dst_idx;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
| 
 | |
|     if (hlsl_ir_expr(instr)->op != HLSL_OP1_CAST)
 | |
|         return false;
 | |
| 
 | |
|     arg = hlsl_ir_expr(instr)->operands[0].node;
 | |
|     dst_type = instr->data_type;
 | |
|     src_type = arg->data_type;
 | |
| 
 | |
|     if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR)
 | |
|         return false;
 | |
| 
 | |
|     src_comp_count = hlsl_type_component_count(src_type);
 | |
|     dst_comp_count = hlsl_type_component_count(dst_type);
 | |
|     broadcast = hlsl_is_numeric_type(src_type) && src_type->e.numeric.dimx == 1 && src_type->e.numeric.dimy == 1;
 | |
|     matrix_cast = !broadcast && dst_comp_count != src_comp_count
 | |
|             && src_type->class == HLSL_CLASS_MATRIX && dst_type->class == HLSL_CLASS_MATRIX;
 | |
| 
 | |
|     VKD3D_ASSERT(src_comp_count >= dst_comp_count || broadcast);
 | |
|     if (matrix_cast)
 | |
|     {
 | |
|         VKD3D_ASSERT(dst_type->e.numeric.dimx <= src_type->e.numeric.dimx);
 | |
|         VKD3D_ASSERT(dst_type->e.numeric.dimy <= src_type->e.numeric.dimy);
 | |
|     }
 | |
| 
 | |
|     if (!(var = hlsl_new_synthetic_var(ctx, "cast", dst_type, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_init_simple_deref_from_var(&var_deref, var);
 | |
| 
 | |
|     for (dst_idx = 0; dst_idx < dst_comp_count; ++dst_idx)
 | |
|     {
 | |
|         struct hlsl_ir_node *component_load, *cast;
 | |
|         struct hlsl_type *dst_comp_type;
 | |
|         unsigned int src_idx;
 | |
| 
 | |
|         if (broadcast)
 | |
|         {
 | |
|             src_idx = 0;
 | |
|         }
 | |
|         else if (matrix_cast)
 | |
|         {
 | |
|             unsigned int x = dst_idx % dst_type->e.numeric.dimx, y = dst_idx / dst_type->e.numeric.dimx;
 | |
| 
 | |
|             src_idx = y * src_type->e.numeric.dimx + x;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             src_idx = dst_idx;
 | |
|         }
 | |
| 
 | |
|         dst_comp_type = hlsl_type_get_component_type(ctx, dst_type, dst_idx);
 | |
|         component_load = hlsl_add_load_component(ctx, block, arg, src_idx, &arg->loc);
 | |
|         cast = hlsl_block_add_cast(ctx, block, component_load, dst_comp_type, &arg->loc);
 | |
| 
 | |
|         hlsl_block_add_store_component(ctx, block, &var_deref, dst_idx, cast);
 | |
|     }
 | |
| 
 | |
|     hlsl_block_add_simple_load(ctx, block, var, &instr->loc);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* hlsl_ir_swizzle nodes that directly point to a matrix value are only a parse-time construct that
 | |
|  * represents matrix swizzles (e.g. mat._m01_m23) before we know if they will be used in the lhs of
 | |
|  * an assignment or as a value made from different components of the matrix. The former cases should
 | |
|  * have already been split into several separate assignments, but the latter are lowered by this
 | |
|  * pass. */
 | |
| static bool lower_matrix_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_swizzle *swizzle;
 | |
|     struct hlsl_deref var_deref;
 | |
|     struct hlsl_type *matrix_type;
 | |
|     struct hlsl_ir_var *var;
 | |
|     unsigned int k, i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_SWIZZLE)
 | |
|         return false;
 | |
|     swizzle = hlsl_ir_swizzle(instr);
 | |
|     matrix_type = swizzle->val.node->data_type;
 | |
|     if (matrix_type->class != HLSL_CLASS_MATRIX)
 | |
|         return false;
 | |
| 
 | |
|     if (!(var = hlsl_new_synthetic_var(ctx, "matrix-swizzle", instr->data_type, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_init_simple_deref_from_var(&var_deref, var);
 | |
| 
 | |
|     for (i = 0; i < instr->data_type->e.numeric.dimx; ++i)
 | |
|     {
 | |
|         struct hlsl_ir_node *load;
 | |
| 
 | |
|         k = swizzle->u.matrix.components[i].y * matrix_type->e.numeric.dimx + swizzle->u.matrix.components[i].x;
 | |
| 
 | |
|         load = hlsl_add_load_component(ctx, block, swizzle->val.node, k, &instr->loc);
 | |
|         hlsl_block_add_store_component(ctx, block, &var_deref, i, load);
 | |
|     }
 | |
| 
 | |
|     hlsl_block_add_simple_load(ctx, block, var, &instr->loc);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* hlsl_ir_index nodes are a parse-time construct used to represent array indexing and struct
 | |
|  * record access before knowing if they will be used in the lhs of an assignment --in which case
 | |
|  * they are lowered into a deref-- or as the load of an element within a larger value.
 | |
|  * For the latter case, this pass takes care of lowering hlsl_ir_indexes into individual
 | |
|  * hlsl_ir_loads, or individual hlsl_ir_resource_loads, in case the indexing is a
 | |
|  * resource access. */
 | |
| static bool lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_deref var_deref;
 | |
|     struct hlsl_ir_index *index;
 | |
|     struct hlsl_ir_load *load;
 | |
|     struct hlsl_ir_node *val;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_INDEX)
 | |
|         return false;
 | |
|     index = hlsl_ir_index(instr);
 | |
|     val = index->val.node;
 | |
| 
 | |
|     if (hlsl_index_is_resource_access(index))
 | |
|     {
 | |
|         unsigned int dim_count = hlsl_sampler_dim_count(val->data_type->sampler_dim);
 | |
|         struct hlsl_ir_node *coords = index->idx.node;
 | |
|         struct hlsl_resource_load_params params = {0};
 | |
| 
 | |
|         VKD3D_ASSERT(coords->data_type->class == HLSL_CLASS_VECTOR);
 | |
|         VKD3D_ASSERT(coords->data_type->e.numeric.type == HLSL_TYPE_UINT);
 | |
|         VKD3D_ASSERT(coords->data_type->e.numeric.dimx == dim_count);
 | |
| 
 | |
|         if (!(coords = add_zero_mipmap_level(ctx, block, coords, &instr->loc)))
 | |
|             return false;
 | |
| 
 | |
|         params.type = HLSL_RESOURCE_LOAD;
 | |
|         params.resource = val;
 | |
|         params.coords = coords;
 | |
|         params.format = val->data_type->e.resource.format;
 | |
|         hlsl_block_add_resource_load(ctx, block, ¶ms, &instr->loc);
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     if (!(var = hlsl_new_synthetic_var(ctx, "index-val", val->data_type, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_init_simple_deref_from_var(&var_deref, var);
 | |
| 
 | |
|     hlsl_block_add_simple_store(ctx, block, var, val);
 | |
| 
 | |
|     if (hlsl_index_is_noncontiguous(index))
 | |
|     {
 | |
|         struct hlsl_ir_node *mat = index->val.node;
 | |
|         struct hlsl_deref row_deref;
 | |
|         unsigned int i;
 | |
| 
 | |
|         VKD3D_ASSERT(!hlsl_type_is_row_major(mat->data_type));
 | |
| 
 | |
|         if (!(var = hlsl_new_synthetic_var(ctx, "row", instr->data_type, &instr->loc)))
 | |
|             return false;
 | |
|         hlsl_init_simple_deref_from_var(&row_deref, var);
 | |
| 
 | |
|         for (i = 0; i < mat->data_type->e.numeric.dimx; ++i)
 | |
|         {
 | |
|             struct hlsl_ir_node *c;
 | |
| 
 | |
|             c = hlsl_block_add_uint_constant(ctx, block, i, &instr->loc);
 | |
| 
 | |
|             if (!(load = hlsl_new_load_index(ctx, &var_deref, c, &instr->loc)))
 | |
|                 return false;
 | |
|             hlsl_block_add_instr(block, &load->node);
 | |
| 
 | |
|             if (!(load = hlsl_new_load_index(ctx, &load->src, index->idx.node, &instr->loc)))
 | |
|                 return false;
 | |
|             hlsl_block_add_instr(block, &load->node);
 | |
| 
 | |
|             hlsl_block_add_store_index(ctx, block, &row_deref, c, &load->node, 0, &instr->loc);
 | |
|         }
 | |
| 
 | |
|         hlsl_block_add_simple_load(ctx, block, var, &instr->loc);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         hlsl_block_add_load_index(ctx, block, &var_deref, index->idx.node, &instr->loc);
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Lower casts from vec1 to vecN to swizzles. */
 | |
| static bool lower_broadcasts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     const struct hlsl_type *src_type, *dst_type;
 | |
|     struct hlsl_type *dst_scalar_type;
 | |
|     struct hlsl_ir_expr *cast;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     cast = hlsl_ir_expr(instr);
 | |
|     if (cast->op != HLSL_OP1_CAST)
 | |
|         return false;
 | |
|     src_type = cast->operands[0].node->data_type;
 | |
|     dst_type = cast->node.data_type;
 | |
| 
 | |
|     if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR && src_type->e.numeric.dimx == 1)
 | |
|     {
 | |
|         struct hlsl_ir_node *new_cast;
 | |
| 
 | |
|         dst_scalar_type = hlsl_get_scalar_type(ctx, dst_type->e.numeric.type);
 | |
|         /* We need to preserve the cast since it might be doing more than just
 | |
|          * turning the scalar into a vector. */
 | |
|         new_cast = hlsl_block_add_cast(ctx, block, cast->operands[0].node, dst_scalar_type, &cast->node.loc);
 | |
| 
 | |
|         if (dst_type->e.numeric.dimx != 1)
 | |
|             hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X),
 | |
|                     dst_type->e.numeric.dimx, new_cast, &cast->node.loc);
 | |
| 
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| /* Allocate a unique, ordered index to each instruction, which will be used for
 | |
|  * copy propagation and computing liveness ranges.
 | |
|  * Index 0 means unused, so start at 1. */
 | |
| static unsigned int index_instructions(struct hlsl_block *block, unsigned int index)
 | |
| {
 | |
|     struct hlsl_ir_node *instr;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         instr->index = index++;
 | |
| 
 | |
|         if (instr->type == HLSL_IR_IF)
 | |
|         {
 | |
|             struct hlsl_ir_if *iff = hlsl_ir_if(instr);
 | |
|             index = index_instructions(&iff->then_block, index);
 | |
|             index = index_instructions(&iff->else_block, index);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_LOOP)
 | |
|         {
 | |
|             index = index_instructions(&hlsl_ir_loop(instr)->body, index);
 | |
|             hlsl_ir_loop(instr)->next_index = index;
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_SWITCH)
 | |
|         {
 | |
|             struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
 | |
|             struct hlsl_ir_switch_case *c;
 | |
| 
 | |
|             LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|             {
 | |
|                 index = index_instructions(&c->body, index);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return index;
 | |
| }
 | |
| 
 | |
| /*
 | |
|  * Copy propagation. The basic idea is to recognize instruction sequences of the
 | |
|  * form:
 | |
|  *
 | |
|  *   2: <any instruction>
 | |
|  *   3: v = @2
 | |
|  *   4: load(v)
 | |
|  *
 | |
|  * and replace the load (@4) with the original instruction (@2).
 | |
|  * This works for multiple components, even if they're written using separate
 | |
|  * store instructions, as long as the rhs is the same in every case. This basic
 | |
|  * detection is implemented by copy_propagation_replace_with_single_instr().
 | |
|  *
 | |
|  * In some cases, the load itself might not have a single source, but a
 | |
|  * subsequent swizzle might; hence we also try to replace swizzles of loads.
 | |
|  *
 | |
|  * We use the same infrastructure to implement a more specialized
 | |
|  * transformation. We recognize sequences of the form:
 | |
|  *
 | |
|  *   2: 123
 | |
|  *   3: var.x = @2
 | |
|  *   4: 345
 | |
|  *   5: var.y = @4
 | |
|  *   6: load(var.xy)
 | |
|  *
 | |
|  * where the load (@6) originates from different sources but that are constant,
 | |
|  * and transform it into a single constant vector. This latter pass is done
 | |
|  * by copy_propagation_replace_with_constant_vector().
 | |
|  *
 | |
|  * This is a specialized form of vectorization, and begs the question: why does
 | |
|  * the load need to be involved? Can we just vectorize the stores into a single
 | |
|  * instruction, and then use "normal" copy-prop to convert that into a single
 | |
|  * vector?
 | |
|  *
 | |
|  * In general, the answer is yes, but there is a special case which necessitates
 | |
|  * the use of this transformation: non-uniform control flow. Copy-prop can act
 | |
|  * across some control flow, and in cases like the following:
 | |
|  *
 | |
|  *   2: 123
 | |
|  *   3: var.x = @2
 | |
|  *   4: if (...)
 | |
|  *   5:    456
 | |
|  *   6:    var.y = @5
 | |
|  *   7:    load(var.xy)
 | |
|  *
 | |
|  * we can copy-prop the load (@7) into a constant vector {123, 456}, but we
 | |
|  * cannot easily vectorize the stores @3 and @6.
 | |
|  *
 | |
|  * Moreover, we implement a transformation that propagates loads with a single
 | |
|  * non-constant index in its deref path. Consider a load of the form
 | |
|  * var[[a0][a1]...[i]...[an]], where ak are integral constants, and i is an
 | |
|  * arbitrary non-constant node. If, for all j, the following holds:
 | |
|  *
 | |
|  *   var[[a0][a1]...[j]...[an]] = x[[c0*j + d0][c1*j + d1]...[cm*j + dm]],
 | |
|  *
 | |
|  * where ck, dk are constants, then we can replace the load with
 | |
|  * x[[c0*i + d0]...[cm*i + dm]]. This pass is implemented by
 | |
|  * copy_propagation_replace_with_deref().
 | |
|  */
 | |
| 
 | |
| struct copy_propagation_value
 | |
| {
 | |
|     unsigned int timestamp;
 | |
|     /* If node is NULL, the value was dynamically written and thus, it is unknown.*/
 | |
|     struct hlsl_ir_node *node;
 | |
|     unsigned int component;
 | |
| };
 | |
| 
 | |
| struct copy_propagation_component_trace
 | |
| {
 | |
|     struct copy_propagation_value *records;
 | |
|     size_t record_count, record_capacity;
 | |
| };
 | |
| 
 | |
| struct copy_propagation_var_def
 | |
| {
 | |
|     struct rb_entry entry;
 | |
|     struct hlsl_ir_var *var;
 | |
|     struct copy_propagation_component_trace traces[];
 | |
| };
 | |
| 
 | |
| struct copy_propagation_state
 | |
| {
 | |
|     struct rb_tree *scope_var_defs;
 | |
|     size_t scope_count, scopes_capacity;
 | |
|     struct hlsl_ir_node *stop;
 | |
|     bool stopped;
 | |
| };
 | |
| 
 | |
| static int copy_propagation_var_def_compare(const void *key, const struct rb_entry *entry)
 | |
| {
 | |
|     struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry);
 | |
|     uintptr_t key_int = (uintptr_t)key, entry_int = (uintptr_t)var_def->var;
 | |
| 
 | |
|     return (key_int > entry_int) - (key_int < entry_int);
 | |
| }
 | |
| 
 | |
| static void copy_propagation_var_def_destroy(struct rb_entry *entry, void *context)
 | |
| {
 | |
|     struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry);
 | |
|     unsigned int component_count = hlsl_type_component_count(var_def->var->data_type);
 | |
|     unsigned int i;
 | |
| 
 | |
|     for (i = 0; i < component_count; ++i)
 | |
|         vkd3d_free(var_def->traces[i].records);
 | |
|     vkd3d_free(var_def);
 | |
| }
 | |
| 
 | |
| static size_t copy_propagation_push_scope(struct copy_propagation_state *state, struct hlsl_ctx *ctx)
 | |
| {
 | |
|     if (!(hlsl_array_reserve(ctx, (void **)&state->scope_var_defs, &state->scopes_capacity,
 | |
|             state->scope_count + 1, sizeof(*state->scope_var_defs))))
 | |
|         return false;
 | |
| 
 | |
|     rb_init(&state->scope_var_defs[state->scope_count++], copy_propagation_var_def_compare);
 | |
| 
 | |
|     return state->scope_count;
 | |
| }
 | |
| 
 | |
| static size_t copy_propagation_pop_scope(struct copy_propagation_state *state)
 | |
| {
 | |
|     rb_destroy(&state->scope_var_defs[--state->scope_count], copy_propagation_var_def_destroy, NULL);
 | |
| 
 | |
|     return state->scope_count;
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_state_init(struct copy_propagation_state *state, struct hlsl_ctx *ctx)
 | |
| {
 | |
|     memset(state, 0, sizeof(*state));
 | |
| 
 | |
|     return copy_propagation_push_scope(state, ctx);
 | |
| }
 | |
| 
 | |
| static void copy_propagation_state_destroy(struct copy_propagation_state *state)
 | |
| {
 | |
|     while (copy_propagation_pop_scope(state));
 | |
| 
 | |
|     vkd3d_free(state->scope_var_defs);
 | |
| }
 | |
| 
 | |
| static struct copy_propagation_value *copy_propagation_get_value_at_time(
 | |
|         struct copy_propagation_component_trace *trace, unsigned int time)
 | |
| {
 | |
|     int r;
 | |
| 
 | |
|     for (r = trace->record_count - 1; r >= 0; --r)
 | |
|     {
 | |
|         if (trace->records[r].timestamp < time)
 | |
|             return &trace->records[r];
 | |
|     }
 | |
| 
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| static struct copy_propagation_value *copy_propagation_get_value(const struct copy_propagation_state *state,
 | |
|         const struct hlsl_ir_var *var, unsigned int component, unsigned int time)
 | |
| {
 | |
|     for (size_t i = state->scope_count - 1; i < state->scope_count; i--)
 | |
|     {
 | |
|         struct rb_tree *tree = &state->scope_var_defs[i];
 | |
|         struct rb_entry *entry = rb_get(tree, var);
 | |
|         if (entry)
 | |
|         {
 | |
|             struct copy_propagation_var_def *var_def = RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry);
 | |
|             unsigned int component_count = hlsl_type_component_count(var->data_type);
 | |
|             struct copy_propagation_value *value;
 | |
| 
 | |
|             VKD3D_ASSERT(component < component_count);
 | |
|             value = copy_propagation_get_value_at_time(&var_def->traces[component], time);
 | |
| 
 | |
|             if (!value)
 | |
|                 continue;
 | |
| 
 | |
|             if (value->node)
 | |
|                 return value;
 | |
|             else
 | |
|                 return NULL;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| static struct copy_propagation_var_def *copy_propagation_create_var_def(struct hlsl_ctx *ctx,
 | |
|         struct copy_propagation_state *state, struct hlsl_ir_var *var)
 | |
| {
 | |
|     struct rb_tree *tree = &state->scope_var_defs[state->scope_count - 1];
 | |
|     struct rb_entry *entry = rb_get(tree, var);
 | |
|     struct copy_propagation_var_def *var_def;
 | |
|     unsigned int component_count = hlsl_type_component_count(var->data_type);
 | |
|     int res;
 | |
| 
 | |
|     if (entry)
 | |
|         return RB_ENTRY_VALUE(entry, struct copy_propagation_var_def, entry);
 | |
| 
 | |
|     if (!(var_def = hlsl_alloc(ctx, offsetof(struct copy_propagation_var_def, traces[component_count]))))
 | |
|         return NULL;
 | |
| 
 | |
|     var_def->var = var;
 | |
| 
 | |
|     res = rb_put(tree, var, &var_def->entry);
 | |
|     VKD3D_ASSERT(!res);
 | |
| 
 | |
|     return var_def;
 | |
| }
 | |
| 
 | |
| static void copy_propagation_trace_record_value(struct hlsl_ctx *ctx,
 | |
|         struct copy_propagation_component_trace *trace, struct hlsl_ir_node *node,
 | |
|         unsigned int component, unsigned int time)
 | |
| {
 | |
|     VKD3D_ASSERT(!trace->record_count || trace->records[trace->record_count - 1].timestamp < time);
 | |
| 
 | |
|     if (!hlsl_array_reserve(ctx, (void **)&trace->records, &trace->record_capacity,
 | |
|             trace->record_count + 1, sizeof(trace->records[0])))
 | |
|         return;
 | |
| 
 | |
|     trace->records[trace->record_count].timestamp = time;
 | |
|     trace->records[trace->record_count].node = node;
 | |
|     trace->records[trace->record_count].component = component;
 | |
| 
 | |
|     ++trace->record_count;
 | |
| }
 | |
| 
 | |
| static void copy_propagation_invalidate_variable(struct hlsl_ctx *ctx, struct copy_propagation_var_def *var_def,
 | |
|         unsigned int comp, unsigned char writemask, unsigned int time)
 | |
| {
 | |
|     unsigned i;
 | |
| 
 | |
|     TRACE("Invalidate variable %s[%u]%s.\n", var_def->var->name, comp, debug_hlsl_writemask(writemask));
 | |
| 
 | |
|     for (i = 0; i < 4; ++i)
 | |
|     {
 | |
|         if (writemask & (1u << i))
 | |
|         {
 | |
|             struct copy_propagation_component_trace *trace = &var_def->traces[comp + i];
 | |
| 
 | |
|             /* Don't add an invalidate record if it is already present. */
 | |
|             if (trace->record_count && trace->records[trace->record_count - 1].timestamp == time)
 | |
|             {
 | |
|                 VKD3D_ASSERT(!trace->records[trace->record_count - 1].node);
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             copy_propagation_trace_record_value(ctx, trace, NULL, 0, time);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void copy_propagation_invalidate_variable_from_deref_recurse(struct hlsl_ctx *ctx,
 | |
|         struct copy_propagation_var_def *var_def, const struct hlsl_deref *deref,
 | |
|         struct hlsl_type *type, unsigned int depth, unsigned int comp_start, unsigned char writemask,
 | |
|         unsigned int time)
 | |
| {
 | |
|     unsigned int i, subtype_comp_count;
 | |
|     struct hlsl_ir_node *path_node;
 | |
|     struct hlsl_type *subtype;
 | |
| 
 | |
|     if (depth == deref->path_len)
 | |
|     {
 | |
|         copy_propagation_invalidate_variable(ctx, var_def, comp_start, writemask, time);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     path_node = deref->path[depth].node;
 | |
|     subtype = hlsl_get_element_type_from_path_index(ctx, type, path_node);
 | |
| 
 | |
|     if (type->class == HLSL_CLASS_STRUCT)
 | |
|     {
 | |
|         unsigned int idx = hlsl_ir_constant(path_node)->value.u[0].u;
 | |
| 
 | |
|         for (i = 0; i < idx; ++i)
 | |
|             comp_start += hlsl_type_component_count(type->e.record.fields[i].type);
 | |
| 
 | |
|         copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype,
 | |
|                 depth + 1, comp_start, writemask, time);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         subtype_comp_count = hlsl_type_component_count(subtype);
 | |
| 
 | |
|         if (path_node->type == HLSL_IR_CONSTANT)
 | |
|         {
 | |
|             uint32_t index = hlsl_ir_constant(path_node)->value.u[0].u;
 | |
| 
 | |
|             /* Don't bother invalidating anything if the index is constant but
 | |
|              * out-of-range.
 | |
|              * Such indices are illegal in HLSL, but only if the code is not
 | |
|              * dead, and we can't always know if code is dead without copy-prop
 | |
|              * itself. */
 | |
|             if (index >= hlsl_type_element_count(type))
 | |
|                 return;
 | |
| 
 | |
|             copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype,
 | |
|                     depth + 1, comp_start + index * subtype_comp_count, writemask, time);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             for (i = 0; i < hlsl_type_element_count(type); ++i)
 | |
|             {
 | |
|                 copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, subtype,
 | |
|                         depth + 1, comp_start + i * subtype_comp_count, writemask, time);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void copy_propagation_invalidate_variable_from_deref(struct hlsl_ctx *ctx,
 | |
|         struct copy_propagation_var_def *var_def, const struct hlsl_deref *deref,
 | |
|         unsigned char writemask, unsigned int time)
 | |
| {
 | |
|     copy_propagation_invalidate_variable_from_deref_recurse(ctx, var_def, deref, deref->var->data_type,
 | |
|             0, 0, writemask, time);
 | |
| }
 | |
| 
 | |
| static void copy_propagation_set_value(struct hlsl_ctx *ctx, struct copy_propagation_var_def *var_def,
 | |
|         unsigned int comp, unsigned char writemask, struct hlsl_ir_node *instr, unsigned int time)
 | |
| {
 | |
|     unsigned int i, j = 0;
 | |
| 
 | |
|     for (i = 0; i < 4; ++i)
 | |
|     {
 | |
|         if (writemask & (1u << i))
 | |
|         {
 | |
|             struct copy_propagation_component_trace *trace = &var_def->traces[comp + i];
 | |
| 
 | |
|             TRACE("Variable %s[%u] is written by instruction %p%s.\n",
 | |
|                     var_def->var->name, comp + i, instr, debug_hlsl_writemask(1u << i));
 | |
| 
 | |
|             copy_propagation_trace_record_value(ctx, trace, instr, j++, time);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_replace_with_single_instr(struct hlsl_ctx *ctx,
 | |
|         const struct copy_propagation_state *state, const struct hlsl_ir_load *load,
 | |
|         uint32_t swizzle, struct hlsl_ir_node *instr)
 | |
| {
 | |
|     const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type);
 | |
|     const struct hlsl_deref *deref = &load->src;
 | |
|     const struct hlsl_ir_var *var = deref->var;
 | |
|     struct hlsl_ir_node *new_instr = NULL;
 | |
|     unsigned int time = load->node.index;
 | |
|     unsigned int start, count, i;
 | |
|     uint32_t ret_swizzle = 0;
 | |
| 
 | |
|     if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count))
 | |
|         return false;
 | |
| 
 | |
|     for (i = 0; i < instr_component_count; ++i)
 | |
|     {
 | |
|         struct copy_propagation_value *value;
 | |
| 
 | |
|         if (!(value = copy_propagation_get_value(state, var, start + hlsl_swizzle_get_component(swizzle, i),
 | |
|                 time)))
 | |
|             return false;
 | |
| 
 | |
|         if (!new_instr)
 | |
|         {
 | |
|             new_instr = value->node;
 | |
|         }
 | |
|         else if (new_instr != value->node)
 | |
|         {
 | |
|             TRACE("No single source for propagating load from %s[%u-%u]%s\n",
 | |
|                     var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count));
 | |
|             return false;
 | |
|         }
 | |
|         hlsl_swizzle_set_component(&ret_swizzle, i, value->component);
 | |
|     }
 | |
| 
 | |
|     TRACE("Load from %s[%u-%u]%s propagated as instruction %p%s.\n",
 | |
|             var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count),
 | |
|             new_instr, debug_hlsl_swizzle(ret_swizzle, instr_component_count));
 | |
| 
 | |
|     if (new_instr->data_type->class == HLSL_CLASS_SCALAR || new_instr->data_type->class == HLSL_CLASS_VECTOR)
 | |
|     {
 | |
|         struct hlsl_ir_node *swizzle_node;
 | |
| 
 | |
|         if (!(swizzle_node = hlsl_new_swizzle(ctx, ret_swizzle, instr_component_count, new_instr, &instr->loc)))
 | |
|             return false;
 | |
|         list_add_before(&instr->entry, &swizzle_node->entry);
 | |
|         new_instr = swizzle_node;
 | |
|     }
 | |
| 
 | |
|     hlsl_replace_node(instr, new_instr);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_replace_with_constant_vector(struct hlsl_ctx *ctx,
 | |
|         const struct copy_propagation_state *state, const struct hlsl_ir_load *load,
 | |
|         uint32_t swizzle, struct hlsl_ir_node *instr)
 | |
| {
 | |
|     const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type);
 | |
|     const struct hlsl_deref *deref = &load->src;
 | |
|     const struct hlsl_ir_var *var = deref->var;
 | |
|     struct hlsl_constant_value values = {0};
 | |
|     unsigned int time = load->node.index;
 | |
|     unsigned int start, count, i;
 | |
|     struct hlsl_ir_node *cons;
 | |
| 
 | |
|     if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count))
 | |
|         return false;
 | |
| 
 | |
|     for (i = 0; i < instr_component_count; ++i)
 | |
|     {
 | |
|         struct copy_propagation_value *value;
 | |
| 
 | |
|         if (!(value = copy_propagation_get_value(state, var, start + hlsl_swizzle_get_component(swizzle, i),
 | |
|                 time)) || value->node->type != HLSL_IR_CONSTANT)
 | |
|             return false;
 | |
| 
 | |
|         values.u[i] = hlsl_ir_constant(value->node)->value.u[value->component];
 | |
|     }
 | |
| 
 | |
|     if (!(cons = hlsl_new_constant(ctx, instr->data_type, &values, &instr->loc)))
 | |
|         return false;
 | |
|     list_add_before(&instr->entry, &cons->entry);
 | |
| 
 | |
|     TRACE("Load from %s[%u-%u]%s turned into a constant %p.\n",
 | |
|             var->name, start, start + count, debug_hlsl_swizzle(swizzle, instr_component_count), cons);
 | |
| 
 | |
|     hlsl_replace_node(instr, cons);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool component_index_from_deref_path_node(struct hlsl_ir_node *path_node,
 | |
|         struct hlsl_type *type, unsigned int *index)
 | |
| {
 | |
|     unsigned int idx, i;
 | |
| 
 | |
|     if (path_node->type != HLSL_IR_CONSTANT)
 | |
|         return false;
 | |
| 
 | |
|     idx = hlsl_ir_constant(path_node)->value.u[0].u;
 | |
|     *index = 0;
 | |
| 
 | |
|     switch (type->class)
 | |
|     {
 | |
|         case HLSL_CLASS_VECTOR:
 | |
|             if (idx >= type->e.numeric.dimx)
 | |
|                 return false;
 | |
|             *index = idx;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_CLASS_MATRIX:
 | |
|             if (idx >= hlsl_type_major_size(type))
 | |
|                 return false;
 | |
|             if (hlsl_type_is_row_major(type))
 | |
|                 *index = idx * type->e.numeric.dimx;
 | |
|             else
 | |
|                 *index = idx * type->e.numeric.dimy;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_CLASS_ARRAY:
 | |
|             if (idx >= type->e.array.elements_count)
 | |
|                 return false;
 | |
|             *index = idx * hlsl_type_component_count(type->e.array.type);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_CLASS_STRUCT:
 | |
|             for (i = 0; i < idx; ++i)
 | |
|                 *index += hlsl_type_component_count(type->e.record.fields[i].type);
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             vkd3d_unreachable();
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool nonconst_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref,
 | |
|         unsigned int *idx, unsigned int *base, unsigned int *scale, unsigned int *count)
 | |
| {
 | |
|     struct hlsl_type *type = deref->var->data_type;
 | |
|     bool found = false;
 | |
|     unsigned int i;
 | |
| 
 | |
|     *base = 0;
 | |
| 
 | |
|     for (i = 0; i < deref->path_len; ++i)
 | |
|     {
 | |
|         struct hlsl_ir_node *path_node = deref->path[i].node;
 | |
|         struct hlsl_type *next_type;
 | |
| 
 | |
|         VKD3D_ASSERT(path_node);
 | |
| 
 | |
|         /* We should always have generated a cast to UINT. */
 | |
|         VKD3D_ASSERT(hlsl_is_vec1(path_node->data_type) && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT);
 | |
| 
 | |
|         next_type = hlsl_get_element_type_from_path_index(ctx, type, path_node);
 | |
| 
 | |
|         if (path_node->type != HLSL_IR_CONSTANT)
 | |
|         {
 | |
|             if (found)
 | |
|                 return false;
 | |
|             found = true;
 | |
|             *idx = i;
 | |
|             *scale = hlsl_type_component_count(next_type);
 | |
|             *count = hlsl_type_element_count(type);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             unsigned int index;
 | |
| 
 | |
|             if (!component_index_from_deref_path_node(path_node, type, &index))
 | |
|                 return false;
 | |
|             *base += index;
 | |
|         }
 | |
| 
 | |
|         type = next_type;
 | |
|     }
 | |
| 
 | |
|     return found;
 | |
| }
 | |
| 
 | |
| static struct hlsl_ir_node *new_affine_path_index(struct hlsl_ctx *ctx, const struct vkd3d_shader_location *loc,
 | |
|         struct hlsl_block *block, struct hlsl_ir_node *index, int c, int d)
 | |
| {
 | |
|     struct hlsl_ir_node *c_node, *d_node, *ic, *idx;
 | |
|     bool use_uint = c >= 0 && d >= 0;
 | |
| 
 | |
|     if (!c)
 | |
|     {
 | |
|         VKD3D_ASSERT(d >= 0);
 | |
| 
 | |
|         return hlsl_block_add_uint_constant(ctx, block, d, loc);
 | |
|     }
 | |
| 
 | |
|     if (use_uint)
 | |
|     {
 | |
|         c_node = hlsl_block_add_uint_constant(ctx, block, c, loc);
 | |
|         d_node = hlsl_block_add_uint_constant(ctx, block, d, loc);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         c_node = hlsl_block_add_int_constant(ctx, block, c, loc);
 | |
|         d_node = hlsl_block_add_int_constant(ctx, block, d, loc);
 | |
|         index = hlsl_block_add_cast(ctx, block, index, hlsl_get_scalar_type(ctx, HLSL_TYPE_INT), loc);
 | |
|     }
 | |
| 
 | |
|     ic = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, index, c_node);
 | |
|     idx = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, ic, d_node);
 | |
|     if (!use_uint)
 | |
|         idx = hlsl_block_add_cast(ctx, block, idx, hlsl_get_scalar_type(ctx, HLSL_TYPE_UINT), loc);
 | |
| 
 | |
|     return idx;
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_replace_with_deref(struct hlsl_ctx *ctx,
 | |
|         const struct copy_propagation_state *state, const struct hlsl_ir_load *load,
 | |
|         uint32_t swizzle, struct hlsl_ir_node *instr)
 | |
| {
 | |
|     const unsigned int instr_component_count = hlsl_type_component_count(instr->data_type);
 | |
|     unsigned int nonconst_i = 0, base, scale, count;
 | |
|     struct hlsl_ir_node *index, *new_instr = NULL;
 | |
|     const struct hlsl_deref *deref = &load->src;
 | |
|     const struct hlsl_ir_var *var = deref->var;
 | |
|     unsigned int time = load->node.index;
 | |
|     struct hlsl_deref tmp_deref = {0};
 | |
|     struct hlsl_ir_load *new_load;
 | |
|     struct hlsl_ir_var *x = NULL;
 | |
|     int *c = NULL, *d = NULL;
 | |
|     uint32_t ret_swizzle = 0;
 | |
|     struct hlsl_block block;
 | |
|     unsigned int path_len;
 | |
|     bool success = false;
 | |
|     int i, j, k;
 | |
| 
 | |
|     if (!nonconst_index_from_deref(ctx, deref, &nonconst_i, &base, &scale, &count))
 | |
|         return false;
 | |
| 
 | |
|     VKD3D_ASSERT(count);
 | |
| 
 | |
|     hlsl_block_init(&block);
 | |
| 
 | |
|     index = deref->path[nonconst_i].node;
 | |
| 
 | |
|     /* Iterate over the nonconst index, and check if their values all have the form
 | |
|      * x[[c0*i + d0][c1*i + d1]...[cm*i + dm]], and determine the constants c, d. */
 | |
|     for (i = 0; i < count; ++i)
 | |
|     {
 | |
|         unsigned int start = base + scale * i;
 | |
|         struct copy_propagation_value *value;
 | |
|         struct hlsl_ir_load *idx;
 | |
|         uint32_t cur_swizzle = 0;
 | |
| 
 | |
|         if (!(value = copy_propagation_get_value(state, var,
 | |
|                 start + hlsl_swizzle_get_component(swizzle, 0), time)))
 | |
|             goto done;
 | |
| 
 | |
|         if (value->node->type != HLSL_IR_LOAD)
 | |
|             goto done;
 | |
|         idx = hlsl_ir_load(value->node);
 | |
| 
 | |
|         if (!x)
 | |
|             x = idx->src.var;
 | |
|         else if (x != idx->src.var)
 | |
|             goto done;
 | |
| 
 | |
|         if (hlsl_version_lt(ctx, 4, 0) && x->is_uniform && ctx->profile->type != VKD3D_SHADER_TYPE_VERTEX)
 | |
|         {
 | |
|             TRACE("Skipping propagating non-constant deref to SM1 uniform %s.\n", var->name);
 | |
|             goto done;
 | |
|         }
 | |
| 
 | |
|         if (i == 0)
 | |
|         {
 | |
|             path_len = idx->src.path_len;
 | |
| 
 | |
|             if (path_len)
 | |
|             {
 | |
|                 if (!(c = hlsl_calloc(ctx, path_len, sizeof(c[0])))
 | |
|                         || !(d = hlsl_alloc(ctx, path_len * sizeof(d[0]))))
 | |
|                     goto done;
 | |
|             }
 | |
| 
 | |
|             for (k = 0; k < path_len; ++k)
 | |
|             {
 | |
|                 if (idx->src.path[k].node->type != HLSL_IR_CONSTANT)
 | |
|                     goto done;
 | |
|                 d[k] = hlsl_ir_constant(idx->src.path[k].node)->value.u[0].u;
 | |
|             }
 | |
| 
 | |
|         }
 | |
|         else if (i == 1)
 | |
|         {
 | |
|             struct hlsl_type *type = idx->src.var->data_type;
 | |
| 
 | |
|             if (idx->src.path_len != path_len)
 | |
|                 goto done;
 | |
| 
 | |
|             /* Calculate constants c and d based on the first two path indices. */
 | |
|             for (k = 0; k < path_len; ++k)
 | |
|             {
 | |
|                 int ix;
 | |
| 
 | |
|                 if (idx->src.path[k].node->type != HLSL_IR_CONSTANT)
 | |
|                     goto done;
 | |
|                 ix = hlsl_ir_constant(idx->src.path[k].node)->value.u[0].u;
 | |
|                 c[k] = ix - d[k];
 | |
|                 d[k] = ix - c[k] * i;
 | |
| 
 | |
|                 if (c[k] && type->class == HLSL_CLASS_STRUCT)
 | |
|                     goto done;
 | |
| 
 | |
|                 type = hlsl_get_element_type_from_path_index(ctx, type, idx->src.path[k].node);
 | |
|             }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             if (idx->src.path_len != path_len)
 | |
|                 goto done;
 | |
| 
 | |
|             /* Check that this load has the form x[[c0*i +d0][c1*i + d1]...[cm*i + dm]]. */
 | |
|             for (k = 0; k < path_len; ++k)
 | |
|             {
 | |
|                 if (idx->src.path[k].node->type != HLSL_IR_CONSTANT)
 | |
|                     goto done;
 | |
|                 if (hlsl_ir_constant(idx->src.path[k].node)->value.u[0].u != c[k] * i + d[k])
 | |
|                     goto done;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         hlsl_swizzle_set_component(&cur_swizzle, 0, value->component);
 | |
| 
 | |
|         for (j = 1; j < instr_component_count; ++j)
 | |
|         {
 | |
|             struct copy_propagation_value *val;
 | |
| 
 | |
|             if (!(val = copy_propagation_get_value(state, var,
 | |
|                     start + hlsl_swizzle_get_component(swizzle, j), time)))
 | |
|                 goto done;
 | |
|             if (val->node != &idx->node)
 | |
|                 goto done;
 | |
| 
 | |
|             hlsl_swizzle_set_component(&cur_swizzle, j, val->component);
 | |
|         }
 | |
| 
 | |
|         if (i == 0)
 | |
|             ret_swizzle = cur_swizzle;
 | |
|         else if (ret_swizzle != cur_swizzle)
 | |
|             goto done;
 | |
|     }
 | |
| 
 | |
|     if (!hlsl_init_deref(ctx, &tmp_deref, x, path_len))
 | |
|         goto done;
 | |
| 
 | |
|     for (k = 0; k < path_len; ++k)
 | |
|     {
 | |
|         hlsl_src_from_node(&tmp_deref.path[k],
 | |
|                 new_affine_path_index(ctx, &load->node.loc, &block, index, c[k], d[k]));
 | |
|     }
 | |
| 
 | |
|     if (!(new_load = hlsl_new_load_index(ctx, &tmp_deref, NULL, &load->node.loc)))
 | |
|         goto done;
 | |
|     new_instr = &new_load->node;
 | |
|     hlsl_block_add_instr(&block, new_instr);
 | |
| 
 | |
|     if (new_instr->data_type->class == HLSL_CLASS_SCALAR || new_instr->data_type->class == HLSL_CLASS_VECTOR)
 | |
|         new_instr = hlsl_block_add_swizzle(ctx, &block, ret_swizzle, instr_component_count, new_instr, &instr->loc);
 | |
| 
 | |
|     if (TRACE_ON())
 | |
|     {
 | |
|         struct vkd3d_string_buffer buffer;
 | |
| 
 | |
|         vkd3d_string_buffer_init(&buffer);
 | |
| 
 | |
|         vkd3d_string_buffer_printf(&buffer, "Load from %s[", var->name);
 | |
|         for (j = 0; j < deref->path_len; ++j)
 | |
|         {
 | |
|             if (j == nonconst_i)
 | |
|                 vkd3d_string_buffer_printf(&buffer, "[i]");
 | |
|             else
 | |
|                 vkd3d_string_buffer_printf(&buffer, "[%u]", hlsl_ir_constant(deref->path[j].node)->value.u[0].u);
 | |
|         }
 | |
|         vkd3d_string_buffer_printf(&buffer, "]%s propagated as %s[",
 | |
|                 debug_hlsl_swizzle(swizzle, instr_component_count), tmp_deref.var->name);
 | |
|         for (k = 0; k < path_len; ++k)
 | |
|         {
 | |
|             if (c[k])
 | |
|                 vkd3d_string_buffer_printf(&buffer, "[i*%d + %d]", c[k], d[k]);
 | |
|             else
 | |
|                 vkd3d_string_buffer_printf(&buffer, "[%d]", d[k]);
 | |
|         }
 | |
|         vkd3d_string_buffer_printf(&buffer, "]%s (i = %p).\n",
 | |
|                 debug_hlsl_swizzle(ret_swizzle, instr_component_count), index);
 | |
| 
 | |
|         vkd3d_string_buffer_trace(&buffer);
 | |
|         vkd3d_string_buffer_cleanup(&buffer);
 | |
|     }
 | |
| 
 | |
|     list_move_before(&instr->entry, &block.instrs);
 | |
|     hlsl_replace_node(instr, new_instr);
 | |
|     success = true;
 | |
| 
 | |
| done:
 | |
|     hlsl_cleanup_deref(&tmp_deref);
 | |
|     hlsl_block_cleanup(&block);
 | |
|     vkd3d_free(c);
 | |
|     vkd3d_free(d);
 | |
|     return success;
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_transform_load(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_ir_load *load, struct copy_propagation_state *state)
 | |
| {
 | |
|     struct hlsl_type *type = load->node.data_type;
 | |
| 
 | |
|     switch (type->class)
 | |
|     {
 | |
|         case HLSL_CLASS_DEPTH_STENCIL_STATE:
 | |
|         case HLSL_CLASS_SCALAR:
 | |
|         case HLSL_CLASS_VECTOR:
 | |
|         case HLSL_CLASS_PIXEL_SHADER:
 | |
|         case HLSL_CLASS_RASTERIZER_STATE:
 | |
|         case HLSL_CLASS_SAMPLER:
 | |
|         case HLSL_CLASS_STRING:
 | |
|         case HLSL_CLASS_TEXTURE:
 | |
|         case HLSL_CLASS_UAV:
 | |
|         case HLSL_CLASS_VERTEX_SHADER:
 | |
|         case HLSL_CLASS_COMPUTE_SHADER:
 | |
|         case HLSL_CLASS_DOMAIN_SHADER:
 | |
|         case HLSL_CLASS_HULL_SHADER:
 | |
|         case HLSL_CLASS_RENDER_TARGET_VIEW:
 | |
|         case HLSL_CLASS_DEPTH_STENCIL_VIEW:
 | |
|         case HLSL_CLASS_GEOMETRY_SHADER:
 | |
|         case HLSL_CLASS_BLEND_STATE:
 | |
|         case HLSL_CLASS_STREAM_OUTPUT:
 | |
|         case HLSL_CLASS_NULL:
 | |
|             break;
 | |
| 
 | |
|         case HLSL_CLASS_MATRIX:
 | |
|         case HLSL_CLASS_ARRAY:
 | |
|         case HLSL_CLASS_STRUCT:
 | |
|             /* We can't handle complex types here.
 | |
|              * They should have been already split anyway by earlier passes,
 | |
|              * but they may not have been deleted yet. We can't rely on DCE to
 | |
|              * solve that problem for us, since we may be called on a partial
 | |
|              * block, but DCE deletes dead stores, so it needs to be able to
 | |
|              * see the whole program. */
 | |
|         case HLSL_CLASS_ERROR:
 | |
|             return false;
 | |
| 
 | |
|         case HLSL_CLASS_CONSTANT_BUFFER:
 | |
|         case HLSL_CLASS_EFFECT_GROUP:
 | |
|         case HLSL_CLASS_PASS:
 | |
|         case HLSL_CLASS_TECHNIQUE:
 | |
|         case HLSL_CLASS_VOID:
 | |
|             vkd3d_unreachable();
 | |
|     }
 | |
| 
 | |
|     if (copy_propagation_replace_with_constant_vector(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node))
 | |
|         return true;
 | |
| 
 | |
|     if (copy_propagation_replace_with_single_instr(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node))
 | |
|         return true;
 | |
| 
 | |
|     if (copy_propagation_replace_with_deref(ctx, state, load, HLSL_SWIZZLE(X, Y, Z, W), &load->node))
 | |
|         return true;
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_transform_swizzle(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_ir_swizzle *swizzle, struct copy_propagation_state *state)
 | |
| {
 | |
|     struct hlsl_ir_load *load;
 | |
| 
 | |
|     if (swizzle->val.node->type != HLSL_IR_LOAD)
 | |
|         return false;
 | |
|     load = hlsl_ir_load(swizzle->val.node);
 | |
| 
 | |
|     if (copy_propagation_replace_with_constant_vector(ctx, state, load, swizzle->u.vector, &swizzle->node))
 | |
|         return true;
 | |
| 
 | |
|     if (copy_propagation_replace_with_single_instr(ctx, state, load, swizzle->u.vector, &swizzle->node))
 | |
|         return true;
 | |
| 
 | |
|     if (copy_propagation_replace_with_deref(ctx, state, load, swizzle->u.vector, &swizzle->node))
 | |
|         return true;
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_transform_object_load(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_deref *deref, struct copy_propagation_state *state, unsigned int time)
 | |
| {
 | |
|     struct copy_propagation_value *value;
 | |
|     struct hlsl_ir_load *load;
 | |
|     unsigned int start, count;
 | |
| 
 | |
|     if (!hlsl_component_index_range_from_deref(ctx, deref, &start, &count))
 | |
|         return false;
 | |
|     VKD3D_ASSERT(count == 1);
 | |
| 
 | |
|     if (!(value = copy_propagation_get_value(state, deref->var, start, time)))
 | |
|         return false;
 | |
|     VKD3D_ASSERT(value->component == 0);
 | |
| 
 | |
|     /* A uniform object should have never been written to. */
 | |
|     VKD3D_ASSERT(!deref->var->is_uniform);
 | |
| 
 | |
|     /* Only HLSL_IR_LOAD can produce an object. */
 | |
|     load = hlsl_ir_load(value->node);
 | |
| 
 | |
|     /* As we are replacing the instruction's deref (with the one in the hlsl_ir_load) and not the
 | |
|      * instruction itself, we won't be able to rely on the value retrieved by
 | |
|      * copy_propagation_get_value() for the new deref in subsequent iterations of copy propagation.
 | |
|      * This is because another value may be written to that deref between the hlsl_ir_load and
 | |
|      * this instruction.
 | |
|      *
 | |
|      * For this reason, we only replace the new deref when it corresponds to a uniform variable,
 | |
|      * which cannot be written to.
 | |
|      *
 | |
|      * In a valid shader, all object references must resolve statically to a single uniform object.
 | |
|      * If this is the case, we can expect copy propagation on regular store/loads and the other
 | |
|      * compilation passes to replace all hlsl_ir_loads with loads to uniform objects, so this
 | |
|      * implementation is complete, even with this restriction.
 | |
|      */
 | |
|     if (!load->src.var->is_uniform)
 | |
|     {
 | |
|         TRACE("Ignoring load from non-uniform object variable %s\n", load->src.var->name);
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     hlsl_cleanup_deref(deref);
 | |
|     hlsl_copy_deref(ctx, deref, &load->src);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_transform_resource_load(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_ir_resource_load *load, struct copy_propagation_state *state)
 | |
| {
 | |
|     bool progress = false;
 | |
| 
 | |
|     progress |= copy_propagation_transform_object_load(ctx, &load->resource, state, load->node.index);
 | |
|     if (load->sampler.var)
 | |
|         progress |= copy_propagation_transform_object_load(ctx, &load->sampler, state, load->node.index);
 | |
|     return progress;
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_transform_resource_store(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_ir_resource_store *store, struct copy_propagation_state *state)
 | |
| {
 | |
|     bool progress = false;
 | |
| 
 | |
|     progress |= copy_propagation_transform_object_load(ctx, &store->resource, state, store->node.index);
 | |
|     return progress;
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_transform_interlocked(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_ir_interlocked *interlocked, struct copy_propagation_state *state)
 | |
| {
 | |
|     bool progress = false;
 | |
| 
 | |
|     progress |= copy_propagation_transform_object_load(ctx, &interlocked->dst, state, interlocked->node.index);
 | |
|     return progress;
 | |
| }
 | |
| 
 | |
| static void copy_propagation_record_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
 | |
|         struct copy_propagation_state *state)
 | |
| {
 | |
|     struct copy_propagation_var_def *var_def;
 | |
|     struct hlsl_deref *lhs = &store->lhs;
 | |
|     struct hlsl_ir_var *var = lhs->var;
 | |
|     unsigned int start, count;
 | |
| 
 | |
|     if (!(var_def = copy_propagation_create_var_def(ctx, state, var)))
 | |
|         return;
 | |
| 
 | |
|     if (hlsl_component_index_range_from_deref(ctx, lhs, &start, &count))
 | |
|     {
 | |
|         unsigned int writemask = store->writemask;
 | |
| 
 | |
|         if (!hlsl_is_numeric_type(store->rhs.node->data_type))
 | |
|             writemask = VKD3DSP_WRITEMASK_0;
 | |
|         copy_propagation_set_value(ctx, var_def, start, writemask, store->rhs.node, store->node.index);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         copy_propagation_invalidate_variable_from_deref(ctx, var_def, lhs, store->writemask,
 | |
|                 store->node.index);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void copy_propagation_invalidate_from_block(struct hlsl_ctx *ctx, struct copy_propagation_state *state,
 | |
|         struct hlsl_block *block, unsigned int time)
 | |
| {
 | |
|     struct hlsl_ir_node *instr;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         switch (instr->type)
 | |
|         {
 | |
|             case HLSL_IR_STORE:
 | |
|             {
 | |
|                 struct hlsl_ir_store *store = hlsl_ir_store(instr);
 | |
|                 struct copy_propagation_var_def *var_def;
 | |
|                 struct hlsl_deref *lhs = &store->lhs;
 | |
|                 struct hlsl_ir_var *var = lhs->var;
 | |
| 
 | |
|                 if (!(var_def = copy_propagation_create_var_def(ctx, state, var)))
 | |
|                     continue;
 | |
| 
 | |
|                 copy_propagation_invalidate_variable_from_deref(ctx, var_def, lhs, store->writemask, time);
 | |
| 
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             case HLSL_IR_IF:
 | |
|             {
 | |
|                 struct hlsl_ir_if *iff = hlsl_ir_if(instr);
 | |
| 
 | |
|                 copy_propagation_invalidate_from_block(ctx, state, &iff->then_block, time);
 | |
|                 copy_propagation_invalidate_from_block(ctx, state, &iff->else_block, time);
 | |
| 
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             case HLSL_IR_LOOP:
 | |
|             {
 | |
|                 struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
 | |
| 
 | |
|                 copy_propagation_invalidate_from_block(ctx, state, &loop->body, time);
 | |
| 
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             case HLSL_IR_SWITCH:
 | |
|             {
 | |
|                 struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
 | |
|                 struct hlsl_ir_switch_case *c;
 | |
| 
 | |
|                 LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|                 {
 | |
|                     copy_propagation_invalidate_from_block(ctx, state, &c->body, time);
 | |
|                 }
 | |
| 
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct copy_propagation_state *state);
 | |
| 
 | |
| static bool copy_propagation_process_if(struct hlsl_ctx *ctx, struct hlsl_ir_if *iff,
 | |
|         struct copy_propagation_state *state)
 | |
| {
 | |
|     bool progress = false;
 | |
| 
 | |
|     copy_propagation_push_scope(state, ctx);
 | |
|     progress |= copy_propagation_transform_block(ctx, &iff->then_block, state);
 | |
|     if (state->stopped)
 | |
|         return progress;
 | |
|     copy_propagation_pop_scope(state);
 | |
| 
 | |
|     copy_propagation_push_scope(state, ctx);
 | |
|     progress |= copy_propagation_transform_block(ctx, &iff->else_block, state);
 | |
|     if (state->stopped)
 | |
|         return progress;
 | |
|     copy_propagation_pop_scope(state);
 | |
| 
 | |
|     /* Ideally we'd invalidate the outer state looking at what was
 | |
|      * touched in the two inner states, but this doesn't work for
 | |
|      * loops (because we need to know what is invalidated in advance),
 | |
|      * so we need copy_propagation_invalidate_from_block() anyway. */
 | |
|     copy_propagation_invalidate_from_block(ctx, state, &iff->then_block, iff->node.index);
 | |
|     copy_propagation_invalidate_from_block(ctx, state, &iff->else_block, iff->node.index);
 | |
| 
 | |
|     return progress;
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_process_loop(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop,
 | |
|         struct copy_propagation_state *state)
 | |
| {
 | |
|     bool progress = false;
 | |
| 
 | |
|     copy_propagation_invalidate_from_block(ctx, state, &loop->body, loop->node.index);
 | |
|     copy_propagation_invalidate_from_block(ctx, state, &loop->iter, loop->node.index);
 | |
| 
 | |
|     copy_propagation_push_scope(state, ctx);
 | |
|     progress |= copy_propagation_transform_block(ctx, &loop->body, state);
 | |
|     if (state->stopped)
 | |
|         return progress;
 | |
|     copy_propagation_pop_scope(state);
 | |
| 
 | |
|     return progress;
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_process_switch(struct hlsl_ctx *ctx, struct hlsl_ir_switch *s,
 | |
|         struct copy_propagation_state *state)
 | |
| {
 | |
|     struct hlsl_ir_switch_case *c;
 | |
|     bool progress = false;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|     {
 | |
|         copy_propagation_push_scope(state, ctx);
 | |
|         progress |= copy_propagation_transform_block(ctx, &c->body, state);
 | |
|         if (state->stopped)
 | |
|             return progress;
 | |
|         copy_propagation_pop_scope(state);
 | |
|     }
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|     {
 | |
|         copy_propagation_invalidate_from_block(ctx, state, &c->body, s->node.index);
 | |
|     }
 | |
| 
 | |
|     return progress;
 | |
| }
 | |
| 
 | |
| static bool copy_propagation_transform_block(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct copy_propagation_state *state)
 | |
| {
 | |
|     struct hlsl_ir_node *instr, *next;
 | |
|     bool progress = false;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         if (instr == state->stop)
 | |
|         {
 | |
|             state->stopped = true;
 | |
|             return progress;
 | |
|         }
 | |
| 
 | |
|         switch (instr->type)
 | |
|         {
 | |
|             case HLSL_IR_LOAD:
 | |
|                 progress |= copy_propagation_transform_load(ctx, hlsl_ir_load(instr), state);
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_RESOURCE_LOAD:
 | |
|                 progress |= copy_propagation_transform_resource_load(ctx, hlsl_ir_resource_load(instr), state);
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_RESOURCE_STORE:
 | |
|                 progress |= copy_propagation_transform_resource_store(ctx, hlsl_ir_resource_store(instr), state);
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_STORE:
 | |
|                 copy_propagation_record_store(ctx, hlsl_ir_store(instr), state);
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_SWIZZLE:
 | |
|                 progress |= copy_propagation_transform_swizzle(ctx, hlsl_ir_swizzle(instr), state);
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_IF:
 | |
|                 progress |= copy_propagation_process_if(ctx, hlsl_ir_if(instr), state);
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_LOOP:
 | |
|                 progress |= copy_propagation_process_loop(ctx, hlsl_ir_loop(instr), state);
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_SWITCH:
 | |
|                 progress |= copy_propagation_process_switch(ctx, hlsl_ir_switch(instr), state);
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_INTERLOCKED:
 | |
|                 progress |= copy_propagation_transform_interlocked(ctx, hlsl_ir_interlocked(instr), state);
 | |
|                 break;
 | |
| 
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
| 
 | |
|         if (state->stopped)
 | |
|             return progress;
 | |
|     }
 | |
| 
 | |
|     return progress;
 | |
| }
 | |
| 
 | |
| bool hlsl_copy_propagation_execute(struct hlsl_ctx *ctx, struct hlsl_block *block)
 | |
| {
 | |
|     struct copy_propagation_state state;
 | |
|     bool progress;
 | |
| 
 | |
|     if (ctx->result)
 | |
|         return false;
 | |
| 
 | |
|     index_instructions(block, 1);
 | |
| 
 | |
|     copy_propagation_state_init(&state, ctx);
 | |
| 
 | |
|     progress = copy_propagation_transform_block(ctx, block, &state);
 | |
| 
 | |
|     copy_propagation_state_destroy(&state);
 | |
| 
 | |
|     return progress;
 | |
| }
 | |
| 
 | |
| enum validation_result
 | |
| {
 | |
|     DEREF_VALIDATION_OK,
 | |
|     DEREF_VALIDATION_OUT_OF_BOUNDS,
 | |
|     DEREF_VALIDATION_NOT_CONSTANT,
 | |
| };
 | |
| 
 | |
| struct vectorize_exprs_state
 | |
| {
 | |
|     struct vectorizable_exprs_group
 | |
|     {
 | |
|         struct hlsl_block *block;
 | |
|         struct hlsl_ir_expr *exprs[4];
 | |
|         uint8_t expr_count, component_count;
 | |
|     } *groups;
 | |
|     size_t count, capacity;
 | |
| };
 | |
| 
 | |
| static bool is_same_vectorizable_source(struct hlsl_ir_node *a, struct hlsl_ir_node *b)
 | |
| {
 | |
|     /* TODO: We can also vectorize different constants. */
 | |
| 
 | |
|     if (a->type == HLSL_IR_SWIZZLE)
 | |
|         a = hlsl_ir_swizzle(a)->val.node;
 | |
|     if (b->type == HLSL_IR_SWIZZLE)
 | |
|         b = hlsl_ir_swizzle(b)->val.node;
 | |
| 
 | |
|     return a == b;
 | |
| }
 | |
| 
 | |
| static bool is_same_vectorizable_expr(struct hlsl_ir_expr *a, struct hlsl_ir_expr *b)
 | |
| {
 | |
|     if (a->op != b->op)
 | |
|         return false;
 | |
| 
 | |
|     for (size_t j = 0; j < HLSL_MAX_OPERANDS; ++j)
 | |
|     {
 | |
|         if (!a->operands[j].node)
 | |
|             break;
 | |
|         if (!is_same_vectorizable_source(a->operands[j].node, b->operands[j].node))
 | |
|             return false;
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static void record_vectorizable_expr(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct hlsl_ir_expr *expr, struct vectorize_exprs_state *state)
 | |
| {
 | |
|     if (expr->node.data_type->class > HLSL_CLASS_VECTOR)
 | |
|         return;
 | |
| 
 | |
|     /* These are the only current ops that are not per-component. */
 | |
|     if (expr->op == HLSL_OP1_COS_REDUCED || expr->op == HLSL_OP1_SIN_REDUCED
 | |
|             || expr->op == HLSL_OP2_DOT || expr->op == HLSL_OP3_DP2ADD)
 | |
|         return;
 | |
| 
 | |
|     for (size_t i = 0; i < state->count; ++i)
 | |
|     {
 | |
|         struct vectorizable_exprs_group *group = &state->groups[i];
 | |
|         struct hlsl_ir_expr *other = group->exprs[0];
 | |
| 
 | |
|         /* These are SSA instructions, which means they have the same value
 | |
|          * regardless of what block they're in. However, being in different
 | |
|          * blocks may mean that one expression or the other is not always
 | |
|          * executed. */
 | |
| 
 | |
|         if (expr->node.data_type->e.numeric.dimx + group->component_count <= 4
 | |
|                 && group->block == block
 | |
|                 && is_same_vectorizable_expr(expr, other))
 | |
|         {
 | |
|             group->exprs[group->expr_count++] = expr;
 | |
|             group->component_count += expr->node.data_type->e.numeric.dimx;
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (!hlsl_array_reserve(ctx, (void **)&state->groups,
 | |
|             &state->capacity, state->count + 1, sizeof(*state->groups)))
 | |
|         return;
 | |
|     state->groups[state->count].block = block;
 | |
|     state->groups[state->count].exprs[0] = expr;
 | |
|     state->groups[state->count].expr_count = 1;
 | |
|     state->groups[state->count].component_count = expr->node.data_type->e.numeric.dimx;
 | |
|     ++state->count;
 | |
| }
 | |
| 
 | |
| static void find_vectorizable_expr_groups(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct vectorize_exprs_state *state)
 | |
| {
 | |
|     struct hlsl_ir_node *instr;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         if (instr->type == HLSL_IR_EXPR)
 | |
|         {
 | |
|             record_vectorizable_expr(ctx, block, hlsl_ir_expr(instr), state);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_IF)
 | |
|         {
 | |
|             struct hlsl_ir_if *iff = hlsl_ir_if(instr);
 | |
| 
 | |
|             find_vectorizable_expr_groups(ctx, &iff->then_block, state);
 | |
|             find_vectorizable_expr_groups(ctx, &iff->else_block, state);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_LOOP)
 | |
|         {
 | |
|             find_vectorizable_expr_groups(ctx, &hlsl_ir_loop(instr)->body, state);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_SWITCH)
 | |
|         {
 | |
|             struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
 | |
|             struct hlsl_ir_switch_case *c;
 | |
| 
 | |
|             LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|                 find_vectorizable_expr_groups(ctx, &c->body, state);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Combine sequences like
 | |
|  *
 | |
|  * 3: @1.x
 | |
|  * 4: @2.x
 | |
|  * 5: @3 * @4
 | |
|  * 6: @1.y
 | |
|  * 7: @2.x
 | |
|  * 8: @6 * @7
 | |
|  *
 | |
|  * into
 | |
|  *
 | |
|  * 5_1: @1.xy
 | |
|  * 5_2: @2.xx
 | |
|  * 5_3: @5_1 * @5_2
 | |
|  * 5:   @5_3.x
 | |
|  * 8:   @5_3.y
 | |
|  *
 | |
|  * Each operand to an expression needs to refer to the same ultimate source
 | |
|  * (in this case @1 and @2 respectively), but can be a swizzle thereof.
 | |
|  *
 | |
|  * In practice the swizzles @5 and @8 can generally then be vectorized again,
 | |
|  * either as part of another expression, or as part of a store.
 | |
|  */
 | |
| static bool vectorize_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block)
 | |
| {
 | |
|     struct vectorize_exprs_state state = {0};
 | |
|     bool progress = false;
 | |
| 
 | |
|     find_vectorizable_expr_groups(ctx, block, &state);
 | |
| 
 | |
|     for (unsigned int i = 0; i < state.count; ++i)
 | |
|     {
 | |
|         struct vectorizable_exprs_group *group = &state.groups[i];
 | |
|         struct hlsl_ir_node *args[HLSL_MAX_OPERANDS] = {0};
 | |
|         uint32_t swizzles[HLSL_MAX_OPERANDS] = {0};
 | |
|         struct hlsl_ir_node *arg, *combined;
 | |
|         unsigned int component_count = 0;
 | |
|         struct hlsl_type *combined_type;
 | |
|         struct hlsl_block new_block;
 | |
|         struct hlsl_ir_expr *expr;
 | |
| 
 | |
|         if (group->expr_count == 1)
 | |
|             continue;
 | |
| 
 | |
|         hlsl_block_init(&new_block);
 | |
| 
 | |
|         for (unsigned int j = 0; j < group->expr_count; ++j)
 | |
|         {
 | |
|             expr = group->exprs[j];
 | |
| 
 | |
|             for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a)
 | |
|             {
 | |
|                 uint32_t arg_swizzle;
 | |
| 
 | |
|                 if (!(arg = expr->operands[a].node))
 | |
|                     break;
 | |
| 
 | |
|                 if (arg->type == HLSL_IR_SWIZZLE)
 | |
|                     arg_swizzle = hlsl_ir_swizzle(arg)->u.vector;
 | |
|                 else
 | |
|                     arg_swizzle = HLSL_SWIZZLE(X, Y, Z, W);
 | |
| 
 | |
|                 /* Mask out the invalid components. */
 | |
|                 arg_swizzle &= (1u << VKD3D_SHADER_SWIZZLE_SHIFT(arg->data_type->e.numeric.dimx)) - 1;
 | |
|                 swizzles[a] |= arg_swizzle << VKD3D_SHADER_SWIZZLE_SHIFT(component_count);
 | |
|             }
 | |
| 
 | |
|             component_count += expr->node.data_type->e.numeric.dimx;
 | |
|         }
 | |
| 
 | |
|         expr = group->exprs[0];
 | |
|         for (unsigned int a = 0; a < HLSL_MAX_OPERANDS; ++a)
 | |
|         {
 | |
|             if (!(arg = expr->operands[a].node))
 | |
|                 break;
 | |
|             if (arg->type == HLSL_IR_SWIZZLE)
 | |
|                 arg = hlsl_ir_swizzle(arg)->val.node;
 | |
|             args[a] = hlsl_block_add_swizzle(ctx, &new_block, swizzles[a], component_count, arg, &arg->loc);
 | |
|         }
 | |
| 
 | |
|         combined_type = hlsl_get_vector_type(ctx, expr->node.data_type->e.numeric.type, component_count);
 | |
|         combined = hlsl_block_add_expr(ctx, &new_block, expr->op, args, combined_type, &expr->node.loc);
 | |
| 
 | |
|         list_move_before(&expr->node.entry, &new_block.instrs);
 | |
| 
 | |
|         TRACE("Combining %u %s instructions into %p.\n", group->expr_count,
 | |
|                 debug_hlsl_expr_op(group->exprs[0]->op), combined);
 | |
| 
 | |
|         component_count = 0;
 | |
|         for (unsigned int j = 0; j < group->expr_count; ++j)
 | |
|         {
 | |
|             struct hlsl_ir_node *replacement;
 | |
| 
 | |
|             expr = group->exprs[j];
 | |
| 
 | |
|             if (!(replacement = hlsl_new_swizzle(ctx,
 | |
|                     HLSL_SWIZZLE(X, Y, Z, W) >> VKD3D_SHADER_SWIZZLE_SHIFT(component_count),
 | |
|                     expr->node.data_type->e.numeric.dimx, combined, &expr->node.loc)))
 | |
|                 goto out;
 | |
|             component_count += expr->node.data_type->e.numeric.dimx;
 | |
|             list_add_before(&expr->node.entry, &replacement->entry);
 | |
|             hlsl_replace_node(&expr->node, replacement);
 | |
|         }
 | |
| 
 | |
|         progress = true;
 | |
|     }
 | |
| 
 | |
| out:
 | |
|     vkd3d_free(state.groups);
 | |
|     return progress;
 | |
| }
 | |
| 
 | |
| struct vectorize_stores_state
 | |
| {
 | |
|     struct vectorizable_stores_group
 | |
|     {
 | |
|         struct hlsl_block *block;
 | |
|         /* We handle overlapping stores, because it's not really easier not to.
 | |
|          * In theory, then, we could collect an arbitrary number of stores here.
 | |
|          *
 | |
|          * In practice, overlapping stores are unlikely, and of course at most
 | |
|          * 4 stores can appear without overlap. Therefore, for simplicity, we
 | |
|          * just use a fixed array of 4.
 | |
|          *
 | |
|          * Since computing the writemask requires traversing the deref, and we
 | |
|          * need to do that anyway, we store it here for convenience. */
 | |
|         struct hlsl_ir_store *stores[4];
 | |
|         unsigned int path_len;
 | |
|         uint8_t writemasks[4];
 | |
|         uint8_t store_count;
 | |
|         bool dirty;
 | |
|     } *groups;
 | |
|     size_t count, capacity;
 | |
| };
 | |
| 
 | |
| /* This must be a store to a subsection of a vector.
 | |
|  * In theory we can also vectorize stores to packed struct fields,
 | |
|  * but this requires target-specific knowledge and is probably best left
 | |
|  * to a VSIR pass. */
 | |
| static bool can_vectorize_store(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
 | |
|         unsigned int *path_len, uint8_t *writemask)
 | |
| {
 | |
|     struct hlsl_type *type = store->lhs.var->data_type;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (store->rhs.node->data_type->class > HLSL_CLASS_VECTOR)
 | |
|         return false;
 | |
| 
 | |
|     if (type->class == HLSL_CLASS_SCALAR)
 | |
|         return false;
 | |
| 
 | |
|     for (i = 0; type->class != HLSL_CLASS_VECTOR && i < store->lhs.path_len; ++i)
 | |
|         type = hlsl_get_element_type_from_path_index(ctx, type, store->lhs.path[i].node);
 | |
| 
 | |
|     if (type->class != HLSL_CLASS_VECTOR)
 | |
|         return false;
 | |
| 
 | |
|     *path_len = i;
 | |
| 
 | |
|     if (i < store->lhs.path_len)
 | |
|     {
 | |
|         struct hlsl_ir_constant *c;
 | |
| 
 | |
|         /* This is a store to a scalar component of a vector, achieved via
 | |
|          * indexing. */
 | |
| 
 | |
|         if (store->lhs.path[i].node->type != HLSL_IR_CONSTANT)
 | |
|             return false;
 | |
|         c = hlsl_ir_constant(store->lhs.path[i].node);
 | |
|         *writemask = (1u << c->value.u[0].u);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         *writemask = store->writemask;
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool derefs_are_same_vector(struct hlsl_ctx *ctx, const struct hlsl_deref *a, const struct hlsl_deref *b)
 | |
| {
 | |
|     struct hlsl_type *type = a->var->data_type;
 | |
| 
 | |
|     if (a->var != b->var)
 | |
|         return false;
 | |
| 
 | |
|     for (unsigned int i = 0; type->class != HLSL_CLASS_VECTOR && i < a->path_len && i < b->path_len; ++i)
 | |
|     {
 | |
|         if (a->path[i].node != b->path[i].node)
 | |
|             return false;
 | |
|         type = hlsl_get_element_type_from_path_index(ctx, type, a->path[i].node);
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static void record_vectorizable_store(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct hlsl_ir_store *store, struct vectorize_stores_state *state)
 | |
| {
 | |
|     unsigned int path_len;
 | |
|     uint8_t writemask;
 | |
| 
 | |
|     if (!can_vectorize_store(ctx, store, &path_len, &writemask))
 | |
|     {
 | |
|         /* In the case of a dynamically indexed vector, we must invalidate
 | |
|          * any groups that statically index the same vector.
 | |
|          * For the sake of expediency, we go one step further and invalidate
 | |
|          * any groups that store to the same variable.
 | |
|          * (We also don't check that that was the reason why this store isn't
 | |
|          * vectorizable.)
 | |
|          * We could be more granular, but we'll defer that until it comes
 | |
|          * up in practice. */
 | |
|         for (size_t i = 0; i < state->count; ++i)
 | |
|         {
 | |
|             if (state->groups[i].stores[0]->lhs.var == store->lhs.var)
 | |
|                 state->groups[i].dirty = true;
 | |
|         }
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     for (size_t i = 0; i < state->count; ++i)
 | |
|     {
 | |
|         struct vectorizable_stores_group *group = &state->groups[i];
 | |
|         struct hlsl_ir_store *other = group->stores[0];
 | |
| 
 | |
|         if (group->dirty)
 | |
|             continue;
 | |
| 
 | |
|         if (derefs_are_same_vector(ctx, &store->lhs, &other->lhs))
 | |
|         {
 | |
|             /* Stores must be in the same CFG block. If they're not,
 | |
|              * they're not executed in exactly the same flow, and
 | |
|              * therefore can't be vectorized. */
 | |
|             if (group->block == block
 | |
|                     && is_same_vectorizable_source(store->rhs.node, other->rhs.node))
 | |
|             {
 | |
|                 if (group->store_count < ARRAY_SIZE(group->stores))
 | |
|                 {
 | |
|                     group->stores[group->store_count] = store;
 | |
|                     group->writemasks[group->store_count] = writemask;
 | |
|                     ++group->store_count;
 | |
|                     return;
 | |
|                 }
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 /* A store to the same vector with a different source, or in
 | |
|                  * a different CFG block, invalidates any earlier store.
 | |
|                  *
 | |
|                  * A store to a component which *contains* the vector in
 | |
|                  * question would also invalidate, but we should have split all
 | |
|                  * of those by the time we get here. */
 | |
|                 group->dirty = true;
 | |
| 
 | |
|                 /* Note that we do exit this loop early if we find a store A we
 | |
|                  * can vectorize with, but that's fine. If there was a store B
 | |
|                  * also in the state that we can't vectorize with, it would
 | |
|                  * already have invalidated A. */
 | |
|             }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             /* This could still be a store to the same vector, if e.g. the
 | |
|              * vector is part of a dynamically indexed array, or the path has
 | |
|              * two equivalent instructions which refer to the same component.
 | |
|              * [CSE may help with the latter, but we don't have it yet,
 | |
|              * and we shouldn't depend on it anyway.]
 | |
|              * For the sake of expediency, we just invalidate it if it refers
 | |
|              * to the same variable at all.
 | |
|              * As above, we could be more granular, but we'll defer that until
 | |
|              * it comes up in practice. */
 | |
|             if (store->lhs.var == other->lhs.var)
 | |
|                 group->dirty = true;
 | |
| 
 | |
|             /* As above, we don't need to worry about exiting the loop early. */
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (!hlsl_array_reserve(ctx, (void **)&state->groups,
 | |
|             &state->capacity, state->count + 1, sizeof(*state->groups)))
 | |
|         return;
 | |
|     state->groups[state->count].block = block;
 | |
|     state->groups[state->count].stores[0] = store;
 | |
|     state->groups[state->count].path_len = path_len;
 | |
|     state->groups[state->count].writemasks[0] = writemask;
 | |
|     state->groups[state->count].store_count = 1;
 | |
|     state->groups[state->count].dirty = false;
 | |
|     ++state->count;
 | |
| }
 | |
| 
 | |
| static void mark_store_groups_dirty(struct hlsl_ctx *ctx,
 | |
|         struct vectorize_stores_state *state, struct hlsl_ir_var *var)
 | |
| {
 | |
|     for (unsigned int i = 0; i < state->count; ++i)
 | |
|     {
 | |
|         if (state->groups[i].stores[0]->lhs.var == var)
 | |
|             state->groups[i].dirty = true;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void find_vectorizable_store_groups(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct vectorize_stores_state *state)
 | |
| {
 | |
|     struct hlsl_ir_node *instr;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         if (instr->type == HLSL_IR_STORE)
 | |
|         {
 | |
|             record_vectorizable_store(ctx, block, hlsl_ir_store(instr), state);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_LOAD)
 | |
|         {
 | |
|             /* By vectorizing store A with store B, we are effectively moving
 | |
|              * store A down to happen at the same time as store B.
 | |
|              * If there was a load of the same variable between the two, this
 | |
|              * would be incorrect.
 | |
|              * Therefore invalidate all stores to this variable. As above, we
 | |
|              * could be more granular if necessary. */
 | |
|             mark_store_groups_dirty(ctx, state, hlsl_ir_load(instr)->src.var);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_INTERLOCKED)
 | |
|         {
 | |
|             /* An interlocked operation can be used on shared memory variables,
 | |
|              * and it is at the same time both a store and a load, thus, we
 | |
|              * should also mark all stores to this variable as dirty once we
 | |
|              * find one.*/
 | |
|             mark_store_groups_dirty(ctx, state, hlsl_ir_interlocked(instr)->dst.var);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_IF)
 | |
|         {
 | |
|             struct hlsl_ir_if *iff = hlsl_ir_if(instr);
 | |
| 
 | |
|             find_vectorizable_store_groups(ctx, &iff->then_block, state);
 | |
|             find_vectorizable_store_groups(ctx, &iff->else_block, state);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_LOOP)
 | |
|         {
 | |
|             find_vectorizable_store_groups(ctx, &hlsl_ir_loop(instr)->body, state);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_SWITCH)
 | |
|         {
 | |
|             struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
 | |
|             struct hlsl_ir_switch_case *c;
 | |
| 
 | |
|             LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|                 find_vectorizable_store_groups(ctx, &c->body, state);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Combine sequences like
 | |
|  *
 | |
|  * 2: @1.yw
 | |
|  * 3: @1.zy
 | |
|  * 4: var.xy = @2
 | |
|  * 5: var.yw = @3
 | |
|  *
 | |
|  * to
 | |
|  *
 | |
|  * 2: @1.yzy
 | |
|  * 5: var.xyw = @2
 | |
|  *
 | |
|  * There are a lot of gotchas here. We need to make sure the two stores are to
 | |
|  * the same vector (which may be embedded in a complex variable), that they're
 | |
|  * always executed in the same control flow, and that there aren't any other
 | |
|  * stores or loads on the same vector in the middle. */
 | |
| static bool vectorize_stores(struct hlsl_ctx *ctx, struct hlsl_block *block)
 | |
| {
 | |
|     struct vectorize_stores_state state = {0};
 | |
|     bool progress = false;
 | |
| 
 | |
|     find_vectorizable_store_groups(ctx, block, &state);
 | |
| 
 | |
|     for (unsigned int i = 0; i < state.count; ++i)
 | |
|     {
 | |
|         struct vectorizable_stores_group *group = &state.groups[i];
 | |
|         uint32_t new_swizzle = 0, new_writemask = 0;
 | |
|         struct hlsl_ir_node *new_rhs, *value;
 | |
|         uint32_t swizzle_components[4];
 | |
|         unsigned int component_count;
 | |
|         struct hlsl_ir_store *store;
 | |
|         struct hlsl_block new_block;
 | |
| 
 | |
|         if (group->store_count == 1)
 | |
|             continue;
 | |
| 
 | |
|         hlsl_block_init(&new_block);
 | |
| 
 | |
|         /* Compute the swizzle components. */
 | |
|         for (unsigned int j = 0; j < group->store_count; ++j)
 | |
|         {
 | |
|             unsigned int writemask = group->writemasks[j];
 | |
|             uint32_t rhs_swizzle;
 | |
| 
 | |
|             store = group->stores[j];
 | |
| 
 | |
|             if (store->rhs.node->type == HLSL_IR_SWIZZLE)
 | |
|                 rhs_swizzle = hlsl_ir_swizzle(store->rhs.node)->u.vector;
 | |
|             else
 | |
|                 rhs_swizzle = HLSL_SWIZZLE(X, Y, Z, W);
 | |
| 
 | |
|             component_count = 0;
 | |
|             for (unsigned int k = 0; k < 4; ++k)
 | |
|             {
 | |
|                 if (writemask & (1u << k))
 | |
|                     swizzle_components[k] = hlsl_swizzle_get_component(rhs_swizzle, component_count++);
 | |
|             }
 | |
| 
 | |
|             new_writemask |= writemask;
 | |
|         }
 | |
| 
 | |
|         /* Construct the new swizzle. */
 | |
|         component_count = 0;
 | |
|         for (unsigned int k = 0; k < 4; ++k)
 | |
|         {
 | |
|             if (new_writemask & (1u << k))
 | |
|                 hlsl_swizzle_set_component(&new_swizzle, component_count++, swizzle_components[k]);
 | |
|         }
 | |
| 
 | |
|         store = group->stores[0];
 | |
|         value = store->rhs.node;
 | |
|         if (value->type == HLSL_IR_SWIZZLE)
 | |
|             value = hlsl_ir_swizzle(value)->val.node;
 | |
| 
 | |
|         new_rhs = hlsl_block_add_swizzle(ctx, &new_block, new_swizzle, component_count, value, &value->loc);
 | |
|         hlsl_block_add_store_parent(ctx, &new_block, &store->lhs,
 | |
|                 group->path_len, new_rhs, new_writemask, &store->node.loc);
 | |
| 
 | |
|         TRACE("Combining %u stores to %s.\n", group->store_count, store->lhs.var->name);
 | |
| 
 | |
|         list_move_before(&group->stores[group->store_count - 1]->node.entry, &new_block.instrs);
 | |
| 
 | |
|         for (unsigned int j = 0; j < group->store_count; ++j)
 | |
|         {
 | |
|             list_remove(&group->stores[j]->node.entry);
 | |
|             hlsl_free_instr(&group->stores[j]->node);
 | |
|         }
 | |
| 
 | |
|         progress = true;
 | |
|     }
 | |
| 
 | |
|     vkd3d_free(state.groups);
 | |
|     return progress;
 | |
| }
 | |
| 
 | |
| static enum validation_result validate_component_index_range_from_deref(struct hlsl_ctx *ctx,
 | |
|         const struct hlsl_deref *deref)
 | |
| {
 | |
|     struct hlsl_type *type = deref->var->data_type;
 | |
|     unsigned int i;
 | |
| 
 | |
|     for (i = 0; i < deref->path_len; ++i)
 | |
|     {
 | |
|         struct hlsl_ir_node *path_node = deref->path[i].node;
 | |
|         unsigned int idx = 0;
 | |
| 
 | |
|         VKD3D_ASSERT(path_node);
 | |
|         if (path_node->type != HLSL_IR_CONSTANT)
 | |
|             return DEREF_VALIDATION_NOT_CONSTANT;
 | |
| 
 | |
|         /* We should always have generated a cast to UINT. */
 | |
|         VKD3D_ASSERT(hlsl_is_vec1(path_node->data_type) && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT);
 | |
| 
 | |
|         idx = hlsl_ir_constant(path_node)->value.u[0].u;
 | |
| 
 | |
|         switch (type->class)
 | |
|         {
 | |
|             case HLSL_CLASS_VECTOR:
 | |
|                 if (idx >= type->e.numeric.dimx)
 | |
|                 {
 | |
|                     hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS,
 | |
|                             "Vector index is out of bounds. %u/%u", idx, type->e.numeric.dimx);
 | |
|                     return DEREF_VALIDATION_OUT_OF_BOUNDS;
 | |
|                 }
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_CLASS_MATRIX:
 | |
|                 if (idx >= hlsl_type_major_size(type))
 | |
|                 {
 | |
|                     hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS,
 | |
|                             "Matrix index is out of bounds. %u/%u", idx, hlsl_type_major_size(type));
 | |
|                     return DEREF_VALIDATION_OUT_OF_BOUNDS;
 | |
|                 }
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_CLASS_ARRAY:
 | |
|                 if (idx >= type->e.array.elements_count)
 | |
|                 {
 | |
|                     hlsl_error(ctx, &path_node->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS,
 | |
|                             "Array index is out of bounds. %u/%u", idx, type->e.array.elements_count);
 | |
|                     return DEREF_VALIDATION_OUT_OF_BOUNDS;
 | |
|                 }
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_CLASS_STRUCT:
 | |
|                 break;
 | |
| 
 | |
|             default:
 | |
|                 vkd3d_unreachable();
 | |
|         }
 | |
| 
 | |
|         type = hlsl_get_element_type_from_path_index(ctx, type, path_node);
 | |
|     }
 | |
| 
 | |
|     return DEREF_VALIDATION_OK;
 | |
| }
 | |
| 
 | |
| static void note_non_static_deref_expressions(struct hlsl_ctx *ctx, const struct hlsl_deref *deref,
 | |
|         const char *usage)
 | |
| {
 | |
|     unsigned int i;
 | |
| 
 | |
|     for (i = 0; i < deref->path_len; ++i)
 | |
|     {
 | |
|         struct hlsl_ir_node *path_node = deref->path[i].node;
 | |
| 
 | |
|         VKD3D_ASSERT(path_node);
 | |
|         if (path_node->type != HLSL_IR_CONSTANT)
 | |
|             hlsl_note(ctx, &path_node->loc, VKD3D_SHADER_LOG_ERROR,
 | |
|                     "Expression for %s within \"%s\" cannot be resolved statically.",
 | |
|                     usage, deref->var->name);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static bool validate_dereferences(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
 | |
|         void *context)
 | |
| {
 | |
|     switch (instr->type)
 | |
|     {
 | |
|         case HLSL_IR_RESOURCE_LOAD:
 | |
|         {
 | |
|             struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
 | |
| 
 | |
|             if (!load->resource.var->is_uniform)
 | |
|             {
 | |
|                 hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF,
 | |
|                         "Loaded resource must have a single uniform source.");
 | |
|             }
 | |
|             else if (validate_component_index_range_from_deref(ctx, &load->resource) == DEREF_VALIDATION_NOT_CONSTANT)
 | |
|             {
 | |
|                 hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF,
 | |
|                         "Loaded resource from \"%s\" must be determinable at compile time.",
 | |
|                         load->resource.var->name);
 | |
|                 note_non_static_deref_expressions(ctx, &load->resource, "loaded resource");
 | |
|             }
 | |
| 
 | |
|             if (load->sampler.var)
 | |
|             {
 | |
|                 if (!load->sampler.var->is_uniform)
 | |
|                 {
 | |
|                     hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF,
 | |
|                             "Resource load sampler must have a single uniform source.");
 | |
|                 }
 | |
|                 else if (validate_component_index_range_from_deref(ctx, &load->sampler) == DEREF_VALIDATION_NOT_CONSTANT)
 | |
|                 {
 | |
|                     hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF,
 | |
|                             "Resource load sampler from \"%s\" must be determinable at compile time.",
 | |
|                             load->sampler.var->name);
 | |
|                     note_non_static_deref_expressions(ctx, &load->sampler, "resource load sampler");
 | |
|                 }
 | |
|             }
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_RESOURCE_STORE:
 | |
|         {
 | |
|             struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr);
 | |
| 
 | |
|             if (!store->resource.var->is_uniform)
 | |
|             {
 | |
|                 hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF,
 | |
|                         "Accessed resource must have a single uniform source.");
 | |
|             }
 | |
|             else if (validate_component_index_range_from_deref(ctx, &store->resource) == DEREF_VALIDATION_NOT_CONSTANT)
 | |
|             {
 | |
|                 hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF,
 | |
|                         "Accessed resource from \"%s\" must be determinable at compile time.",
 | |
|                         store->resource.var->name);
 | |
|                 note_non_static_deref_expressions(ctx, &store->resource, "accessed resource");
 | |
|             }
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_LOAD:
 | |
|         {
 | |
|             struct hlsl_ir_load *load = hlsl_ir_load(instr);
 | |
|             validate_component_index_range_from_deref(ctx, &load->src);
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_STORE:
 | |
|         {
 | |
|             struct hlsl_ir_store *store = hlsl_ir_store(instr);
 | |
|             validate_component_index_range_from_deref(ctx, &store->lhs);
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_INTERLOCKED:
 | |
|         {
 | |
|             struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr);
 | |
| 
 | |
|             if (!interlocked->dst.var->is_uniform)
 | |
|             {
 | |
|                 hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF,
 | |
|                         "Accessed resource must have a single uniform source.");
 | |
|             }
 | |
|             else if (validate_component_index_range_from_deref(ctx, &interlocked->dst) == DEREF_VALIDATION_NOT_CONSTANT)
 | |
|             {
 | |
|                 hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NON_STATIC_OBJECT_REF,
 | |
|                         "Accessed resource from \"%s\" must be determinable at compile time.",
 | |
|                         interlocked->dst.var->name);
 | |
|                 note_non_static_deref_expressions(ctx, &interlocked->dst, "accessed resource");
 | |
|             }
 | |
|             break;
 | |
|         }
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool fold_redundant_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     if (instr->type == HLSL_IR_EXPR)
 | |
|     {
 | |
|         struct hlsl_ir_expr *expr = hlsl_ir_expr(instr);
 | |
|         const struct hlsl_type *dst_type = expr->node.data_type;
 | |
|         const struct hlsl_type *src_type;
 | |
| 
 | |
|         if (expr->op != HLSL_OP1_CAST)
 | |
|             return false;
 | |
| 
 | |
|         src_type = expr->operands[0].node->data_type;
 | |
| 
 | |
|         if (hlsl_types_are_equal(src_type, dst_type)
 | |
|                 || (src_type->e.numeric.type == dst_type->e.numeric.type
 | |
|                 && hlsl_is_vec1(src_type) && hlsl_is_vec1(dst_type)))
 | |
|         {
 | |
|             hlsl_replace_node(&expr->node, expr->operands[0].node);
 | |
|             return true;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| /* Copy an element of a complex variable. Helper for
 | |
|  * split_array_copies(), split_struct_copies() and
 | |
|  * split_matrix_copies(). Inserts new instructions right before
 | |
|  * "store". */
 | |
| static bool split_copy(struct hlsl_ctx *ctx, struct hlsl_ir_store *store,
 | |
|         const struct hlsl_ir_load *load, const unsigned int idx, struct hlsl_type *type)
 | |
| {
 | |
|     struct hlsl_ir_node *split_store, *c;
 | |
|     struct hlsl_ir_load *split_load;
 | |
| 
 | |
|     if (!(c = hlsl_new_uint_constant(ctx, idx, &store->node.loc)))
 | |
|         return false;
 | |
|     list_add_before(&store->node.entry, &c->entry);
 | |
| 
 | |
|     if (!(split_load = hlsl_new_load_index(ctx, &load->src, c, &store->node.loc)))
 | |
|         return false;
 | |
|     list_add_before(&store->node.entry, &split_load->node.entry);
 | |
| 
 | |
|     if (!(split_store = hlsl_new_store_index(ctx, &store->lhs, c, &split_load->node, 0, &store->node.loc)))
 | |
|         return false;
 | |
|     list_add_before(&store->node.entry, &split_store->entry);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool split_array_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     const struct hlsl_ir_node *rhs;
 | |
|     struct hlsl_type *element_type;
 | |
|     const struct hlsl_type *type;
 | |
|     struct hlsl_ir_store *store;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_STORE)
 | |
|         return false;
 | |
| 
 | |
|     store = hlsl_ir_store(instr);
 | |
|     rhs = store->rhs.node;
 | |
|     type = rhs->data_type;
 | |
|     if (type->class != HLSL_CLASS_ARRAY)
 | |
|         return false;
 | |
|     element_type = type->e.array.type;
 | |
| 
 | |
|     if (rhs->type != HLSL_IR_LOAD)
 | |
|     {
 | |
|         hlsl_fixme(ctx, &instr->loc, "Array store rhs is not HLSL_IR_LOAD. Broadcast may be missing.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     for (i = 0; i < type->e.array.elements_count; ++i)
 | |
|     {
 | |
|         if (!split_copy(ctx, store, hlsl_ir_load(rhs), i, element_type))
 | |
|             return false;
 | |
|     }
 | |
| 
 | |
|     /* Remove the store instruction, so that we can split structs which contain
 | |
|      * other structs. Although assignments produce a value, we don't allow
 | |
|      * HLSL_IR_STORE to be used as a source. */
 | |
|     list_remove(&store->node.entry);
 | |
|     hlsl_free_instr(&store->node);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool split_struct_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     const struct hlsl_ir_node *rhs;
 | |
|     const struct hlsl_type *type;
 | |
|     struct hlsl_ir_store *store;
 | |
|     size_t i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_STORE)
 | |
|         return false;
 | |
| 
 | |
|     store = hlsl_ir_store(instr);
 | |
|     rhs = store->rhs.node;
 | |
|     type = rhs->data_type;
 | |
|     if (type->class != HLSL_CLASS_STRUCT)
 | |
|         return false;
 | |
| 
 | |
|     if (rhs->type != HLSL_IR_LOAD)
 | |
|     {
 | |
|         hlsl_fixme(ctx, &instr->loc, "Struct store rhs is not HLSL_IR_LOAD. Broadcast may be missing.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     for (i = 0; i < type->e.record.field_count; ++i)
 | |
|     {
 | |
|         const struct hlsl_struct_field *field = &type->e.record.fields[i];
 | |
| 
 | |
|         if (!split_copy(ctx, store, hlsl_ir_load(rhs), i, field->type))
 | |
|             return false;
 | |
|     }
 | |
| 
 | |
|     /* Remove the store instruction, so that we can split structs which contain
 | |
|      * other structs. Although assignments produce a value, we don't allow
 | |
|      * HLSL_IR_STORE to be used as a source. */
 | |
|     list_remove(&store->node.entry);
 | |
|     hlsl_free_instr(&store->node);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| struct stream_append_ctx
 | |
| {
 | |
|     struct hlsl_ir_function_decl *func;
 | |
|     bool created;
 | |
| };
 | |
| 
 | |
| static bool lower_stream_appends(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     struct stream_append_ctx *append_ctx = context;
 | |
|     struct hlsl_ir_resource_store *store;
 | |
|     const struct hlsl_ir_node *rhs;
 | |
|     const struct hlsl_type *type;
 | |
|     struct hlsl_ir_var *var;
 | |
|     struct hlsl_block block;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_RESOURCE_STORE)
 | |
|         return false;
 | |
| 
 | |
|     store = hlsl_ir_resource_store(instr);
 | |
|     if (store->store_type != HLSL_RESOURCE_STREAM_APPEND)
 | |
|         return false;
 | |
| 
 | |
|     rhs = store->value.node;
 | |
|     var = store->resource.var;
 | |
|     type = hlsl_get_stream_output_type(var->data_type);
 | |
| 
 | |
|     if (rhs->type != HLSL_IR_LOAD)
 | |
|     {
 | |
|         hlsl_fixme(ctx, &instr->loc, "Stream append rhs is not HLSL_IR_LOAD. Broadcast may be missing.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     VKD3D_ASSERT(var->regs[HLSL_REGSET_STREAM_OUTPUTS].allocated);
 | |
| 
 | |
|     if (var->regs[HLSL_REGSET_STREAM_OUTPUTS].index)
 | |
|     {
 | |
|         hlsl_fixme(ctx, &instr->loc, "Append to an output stream with a nonzero stream index.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     hlsl_block_init(&block);
 | |
| 
 | |
|     append_output_copy_recurse(ctx, &block, append_ctx->func, type->e.so.type, hlsl_ir_load(rhs), var->storage_modifiers,
 | |
|             &var->semantic, var->semantic.index, false, !append_ctx->created);
 | |
|     append_ctx->created = true;
 | |
| 
 | |
|     list_move_before(&instr->entry, &block.instrs);
 | |
|     hlsl_src_remove(&store->value);
 | |
| 
 | |
|     return true;
 | |
| 
 | |
| }
 | |
| 
 | |
| static bool split_matrix_copies(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     const struct hlsl_ir_node *rhs;
 | |
|     struct hlsl_type *element_type;
 | |
|     const struct hlsl_type *type;
 | |
|     unsigned int i;
 | |
|     struct hlsl_ir_store *store;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_STORE)
 | |
|         return false;
 | |
| 
 | |
|     store = hlsl_ir_store(instr);
 | |
|     rhs = store->rhs.node;
 | |
|     type = rhs->data_type;
 | |
|     if (type->class != HLSL_CLASS_MATRIX)
 | |
|         return false;
 | |
|     element_type = hlsl_get_vector_type(ctx, type->e.numeric.type, hlsl_type_minor_size(type));
 | |
| 
 | |
|     if (rhs->type != HLSL_IR_LOAD)
 | |
|     {
 | |
|         hlsl_fixme(ctx, &instr->loc, "Copying from unsupported node type.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     for (i = 0; i < hlsl_type_major_size(type); ++i)
 | |
|     {
 | |
|         if (!split_copy(ctx, store, hlsl_ir_load(rhs), i, element_type))
 | |
|             return false;
 | |
|     }
 | |
| 
 | |
|     list_remove(&store->node.entry);
 | |
|     hlsl_free_instr(&store->node);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool lower_narrowing_casts(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     const struct hlsl_type *src_type, *dst_type;
 | |
|     struct hlsl_type *dst_vector_type;
 | |
|     struct hlsl_ir_expr *cast;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     cast = hlsl_ir_expr(instr);
 | |
|     if (cast->op != HLSL_OP1_CAST)
 | |
|         return false;
 | |
|     src_type = cast->operands[0].node->data_type;
 | |
|     dst_type = cast->node.data_type;
 | |
| 
 | |
|     if (src_type->class <= HLSL_CLASS_VECTOR && dst_type->class <= HLSL_CLASS_VECTOR
 | |
|             && dst_type->e.numeric.dimx < src_type->e.numeric.dimx)
 | |
|     {
 | |
|         struct hlsl_ir_node *new_cast;
 | |
| 
 | |
|         dst_vector_type = hlsl_get_vector_type(ctx, dst_type->e.numeric.type, src_type->e.numeric.dimx);
 | |
|         /* We need to preserve the cast since it might be doing more than just
 | |
|          * narrowing the vector. */
 | |
|         new_cast = hlsl_block_add_cast(ctx, block, cast->operands[0].node, dst_vector_type, &cast->node.loc);
 | |
|         hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, Y, Z, W),
 | |
|                 dst_type->e.numeric.dimx, new_cast, &cast->node.loc);
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool fold_swizzle_chains(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     struct hlsl_ir_swizzle *swizzle;
 | |
|     struct hlsl_ir_node *next_instr;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_SWIZZLE)
 | |
|         return false;
 | |
|     swizzle = hlsl_ir_swizzle(instr);
 | |
| 
 | |
|     next_instr = swizzle->val.node;
 | |
| 
 | |
|     if (next_instr->type == HLSL_IR_SWIZZLE)
 | |
|     {
 | |
|         struct hlsl_ir_node *new_swizzle;
 | |
|         uint32_t combined_swizzle;
 | |
| 
 | |
|         combined_swizzle = hlsl_combine_swizzles(hlsl_ir_swizzle(next_instr)->u.vector,
 | |
|                 swizzle->u.vector, instr->data_type->e.numeric.dimx);
 | |
|         next_instr = hlsl_ir_swizzle(next_instr)->val.node;
 | |
| 
 | |
|         if (!(new_swizzle = hlsl_new_swizzle(ctx, combined_swizzle,
 | |
|                 instr->data_type->e.numeric.dimx, next_instr, &instr->loc)))
 | |
|             return false;
 | |
| 
 | |
|         list_add_before(&instr->entry, &new_swizzle->entry);
 | |
|         hlsl_replace_node(instr, new_swizzle);
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool remove_trivial_swizzles(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     struct hlsl_ir_swizzle *swizzle;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_SWIZZLE)
 | |
|         return false;
 | |
|     swizzle = hlsl_ir_swizzle(instr);
 | |
| 
 | |
|     if (instr->data_type->e.numeric.dimx != swizzle->val.node->data_type->e.numeric.dimx)
 | |
|         return false;
 | |
| 
 | |
|     for (i = 0; i < instr->data_type->e.numeric.dimx; ++i)
 | |
|         if (hlsl_swizzle_get_component(swizzle->u.vector, i) != i)
 | |
|             return false;
 | |
| 
 | |
|     hlsl_replace_node(instr, swizzle->val.node);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool remove_trivial_conditional_branches(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     struct hlsl_ir_constant *condition;
 | |
|     struct hlsl_ir_if *iff;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_IF)
 | |
|         return false;
 | |
|     iff = hlsl_ir_if(instr);
 | |
|     if (iff->condition.node->type != HLSL_IR_CONSTANT)
 | |
|         return false;
 | |
|     condition = hlsl_ir_constant(iff->condition.node);
 | |
| 
 | |
|     list_move_before(&instr->entry, condition->value.u[0].u ? &iff->then_block.instrs : &iff->else_block.instrs);
 | |
|     list_remove(&instr->entry);
 | |
|     hlsl_free_instr(instr);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool normalize_switch_cases(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     struct hlsl_ir_switch_case *c, *def = NULL;
 | |
|     bool missing_terminal_break = false;
 | |
|     struct hlsl_ir_node *node;
 | |
|     struct hlsl_ir_switch *s;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_SWITCH)
 | |
|         return false;
 | |
|     s = hlsl_ir_switch(instr);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|     {
 | |
|         bool terminal_break = false;
 | |
| 
 | |
|         if (list_empty(&c->body.instrs))
 | |
|         {
 | |
|             terminal_break = !!list_next(&s->cases, &c->entry);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             node = LIST_ENTRY(list_tail(&c->body.instrs), struct hlsl_ir_node, entry);
 | |
|             if (node->type == HLSL_IR_JUMP)
 | |
|                 terminal_break = (hlsl_ir_jump(node)->type == HLSL_IR_JUMP_BREAK);
 | |
|         }
 | |
| 
 | |
|         missing_terminal_break |= !terminal_break;
 | |
| 
 | |
|         if (!terminal_break)
 | |
|         {
 | |
|             if (c->is_default)
 | |
|             {
 | |
|                 hlsl_error(ctx, &c->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
 | |
|                         "The 'default' case block is not terminated with 'break' or 'return'.");
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 hlsl_error(ctx, &c->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SYNTAX,
 | |
|                         "Switch case block '%u' is not terminated with 'break' or 'return'.", c->value);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (missing_terminal_break)
 | |
|         return true;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|     {
 | |
|         if (c->is_default)
 | |
|         {
 | |
|             def = c;
 | |
| 
 | |
|             /* Remove preceding empty cases. */
 | |
|             while (list_prev(&s->cases, &def->entry))
 | |
|             {
 | |
|                 c = LIST_ENTRY(list_prev(&s->cases, &def->entry), struct hlsl_ir_switch_case, entry);
 | |
|                 if (!list_empty(&c->body.instrs))
 | |
|                     break;
 | |
|                 hlsl_free_ir_switch_case(c);
 | |
|             }
 | |
| 
 | |
|             if (list_empty(&def->body.instrs))
 | |
|             {
 | |
|                 /* Remove following empty cases. */
 | |
|                 while (list_next(&s->cases, &def->entry))
 | |
|                 {
 | |
|                     c = LIST_ENTRY(list_next(&s->cases, &def->entry), struct hlsl_ir_switch_case, entry);
 | |
|                     if (!list_empty(&c->body.instrs))
 | |
|                         break;
 | |
|                     hlsl_free_ir_switch_case(c);
 | |
|                 }
 | |
| 
 | |
|                 /* Merge with the next case. */
 | |
|                 if (list_next(&s->cases, &def->entry))
 | |
|                 {
 | |
|                     c = LIST_ENTRY(list_next(&s->cases, &def->entry), struct hlsl_ir_switch_case, entry);
 | |
|                     c->is_default = true;
 | |
|                     hlsl_free_ir_switch_case(def);
 | |
|                     def = c;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (def)
 | |
|     {
 | |
|         list_remove(&def->entry);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         if (!(def = hlsl_new_switch_case(ctx, 0, true, NULL, &s->node.loc)))
 | |
|             return true;
 | |
|         hlsl_block_add_jump(ctx, &def->body, HLSL_IR_JUMP_BREAK, NULL, &s->node.loc);
 | |
|     }
 | |
|     list_add_tail(&s->cases, &def->entry);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool lower_nonconstant_vector_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *idx;
 | |
|     struct hlsl_deref *deref;
 | |
|     struct hlsl_type *type;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_LOAD)
 | |
|         return false;
 | |
| 
 | |
|     deref = &hlsl_ir_load(instr)->src;
 | |
|     VKD3D_ASSERT(deref->var);
 | |
| 
 | |
|     if (deref->path_len == 0)
 | |
|         return false;
 | |
| 
 | |
|     type = deref->var->data_type;
 | |
|     for (i = 0; i < deref->path_len - 1; ++i)
 | |
|         type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node);
 | |
| 
 | |
|     idx = deref->path[deref->path_len - 1].node;
 | |
| 
 | |
|     if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT)
 | |
|     {
 | |
|         struct hlsl_ir_node *eq, *swizzle, *c, *operands[HLSL_MAX_OPERANDS] = {0};
 | |
|         unsigned int width = type->e.numeric.dimx;
 | |
|         struct hlsl_constant_value value;
 | |
|         struct hlsl_ir_load *vector_load;
 | |
|         enum hlsl_ir_expr_op op;
 | |
| 
 | |
|         if (!(vector_load = hlsl_new_load_parent(ctx, deref, &instr->loc)))
 | |
|             return false;
 | |
|         hlsl_block_add_instr(block, &vector_load->node);
 | |
| 
 | |
|         swizzle = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X), width, idx, &instr->loc);
 | |
| 
 | |
|         value.u[0].u = 0;
 | |
|         value.u[1].u = 1;
 | |
|         value.u[2].u = 2;
 | |
|         value.u[3].u = 3;
 | |
|         if (!(c = hlsl_new_constant(ctx, hlsl_get_vector_type(ctx, HLSL_TYPE_UINT, width), &value, &instr->loc)))
 | |
|             return false;
 | |
|         hlsl_block_add_instr(block, c);
 | |
| 
 | |
|         operands[0] = swizzle;
 | |
|         operands[1] = c;
 | |
|         eq = hlsl_block_add_expr(ctx, block, HLSL_OP2_EQUAL, operands,
 | |
|                 hlsl_get_vector_type(ctx, HLSL_TYPE_BOOL, width), &instr->loc);
 | |
|         eq = hlsl_block_add_cast(ctx, block, eq, type, &instr->loc);
 | |
| 
 | |
|         op = HLSL_OP2_DOT;
 | |
|         if (width == 1)
 | |
|             op = type->e.numeric.type == HLSL_TYPE_BOOL ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL;
 | |
| 
 | |
|         /* Note: We may be creating a DOT for bool vectors here, which we need to lower to
 | |
|          * LOGIC_OR + LOGIC_AND. */
 | |
|         operands[0] = &vector_load->node;
 | |
|         operands[1] = eq;
 | |
|         hlsl_block_add_expr(ctx, block, op, operands, instr->data_type, &instr->loc);
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool validate_nonconstant_vector_store_derefs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *idx;
 | |
|     struct hlsl_deref *deref;
 | |
|     struct hlsl_type *type;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_STORE)
 | |
|         return false;
 | |
| 
 | |
|     deref = &hlsl_ir_store(instr)->lhs;
 | |
|     VKD3D_ASSERT(deref->var);
 | |
| 
 | |
|     if (deref->path_len == 0)
 | |
|         return false;
 | |
| 
 | |
|     type = deref->var->data_type;
 | |
|     for (i = 0; i < deref->path_len - 1; ++i)
 | |
|         type = hlsl_get_element_type_from_path_index(ctx, type, deref->path[i].node);
 | |
| 
 | |
|     idx = deref->path[deref->path_len - 1].node;
 | |
| 
 | |
|     if (type->class == HLSL_CLASS_VECTOR && idx->type != HLSL_IR_CONSTANT)
 | |
|     {
 | |
|         /* We should turn this into an hlsl_error after we implement unrolling, because if we get
 | |
|          * here after that, it means that the HLSL is invalid. */
 | |
|         hlsl_fixme(ctx, &instr->loc, "Non-constant vector addressing on store. Unrolling may be missing.");
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool deref_supports_sm1_indirect_addressing(struct hlsl_ctx *ctx, const struct hlsl_deref *deref)
 | |
| {
 | |
|     return ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX && deref->var->is_uniform;
 | |
| }
 | |
| 
 | |
| /* This pass flattens array (and row_major matrix) loads that include the indexing of a non-constant
 | |
|  * index into multiple constant loads, where the value of only one of them ends up in the resulting
 | |
|  * node.
 | |
|  * This is achieved through a synthetic variable. The non-constant index is compared for equality
 | |
|  * with every possible value it can have within the array bounds, and the ternary operator is used
 | |
|  * to update the value of the synthetic var when the equality check passes. */
 | |
| static bool lower_nonconstant_array_loads(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
 | |
|         struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_constant_value zero_value = {0};
 | |
|     struct hlsl_ir_node *cut_index, *zero;
 | |
|     unsigned int i, i_cut, element_count;
 | |
|     const struct hlsl_deref *deref;
 | |
|     struct hlsl_type *cut_type;
 | |
|     struct hlsl_ir_load *load;
 | |
|     struct hlsl_ir_var *var;
 | |
|     bool row_major;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_LOAD)
 | |
|         return false;
 | |
|     load = hlsl_ir_load(instr);
 | |
|     deref = &load->src;
 | |
| 
 | |
|     if (deref->path_len == 0)
 | |
|         return false;
 | |
| 
 | |
|     if (deref_supports_sm1_indirect_addressing(ctx, deref))
 | |
|         return false;
 | |
| 
 | |
|     for (i = deref->path_len - 1; ; --i)
 | |
|     {
 | |
|         if (deref->path[i].node->type != HLSL_IR_CONSTANT)
 | |
|         {
 | |
|             i_cut = i;
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         if (i == 0)
 | |
|             return false;
 | |
|     }
 | |
| 
 | |
|     cut_index = deref->path[i_cut].node;
 | |
|     cut_type = deref->var->data_type;
 | |
|     for (i = 0; i < i_cut; ++i)
 | |
|         cut_type = hlsl_get_element_type_from_path_index(ctx, cut_type, deref->path[i].node);
 | |
| 
 | |
|     row_major = hlsl_type_is_row_major(cut_type);
 | |
|     VKD3D_ASSERT(cut_type->class == HLSL_CLASS_ARRAY || row_major);
 | |
| 
 | |
|     if (!(var = hlsl_new_synthetic_var(ctx, row_major ? "row_major-load" : "array-load", instr->data_type, &instr->loc)))
 | |
|         return false;
 | |
| 
 | |
|     if (!(zero = hlsl_new_constant(ctx, instr->data_type, &zero_value, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, zero);
 | |
| 
 | |
|     hlsl_block_add_simple_store(ctx, block, var, zero);
 | |
| 
 | |
|     TRACE("Lowering non-constant %s load on variable '%s'.\n", row_major ? "row_major" : "array", deref->var->name);
 | |
| 
 | |
|     element_count = hlsl_type_element_count(cut_type);
 | |
|     for (i = 0; i < element_count; ++i)
 | |
|     {
 | |
|         struct hlsl_ir_node *const_i, *equals, *ternary, *specific_load, *var_load;
 | |
|         struct hlsl_type *btype = hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL);
 | |
|         struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
 | |
|         struct hlsl_deref deref_copy = {0};
 | |
| 
 | |
|         const_i = hlsl_block_add_uint_constant(ctx, block, i, &cut_index->loc);
 | |
| 
 | |
|         operands[0] = cut_index;
 | |
|         operands[1] = const_i;
 | |
|         equals = hlsl_block_add_expr(ctx, block, HLSL_OP2_EQUAL, operands, btype, &cut_index->loc);
 | |
|         equals = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X),
 | |
|                 var->data_type->e.numeric.dimx, equals, &cut_index->loc);
 | |
| 
 | |
|         var_load = hlsl_block_add_simple_load(ctx, block, var, &cut_index->loc);
 | |
| 
 | |
|         if (!hlsl_copy_deref(ctx, &deref_copy, deref))
 | |
|             return false;
 | |
|         hlsl_src_remove(&deref_copy.path[i_cut]);
 | |
|         hlsl_src_from_node(&deref_copy.path[i_cut], const_i);
 | |
|         specific_load = hlsl_block_add_load_index(ctx, block, &deref_copy, NULL, &cut_index->loc);
 | |
|         hlsl_cleanup_deref(&deref_copy);
 | |
| 
 | |
|         operands[0] = equals;
 | |
|         operands[1] = specific_load;
 | |
|         operands[2] = var_load;
 | |
|         ternary = hlsl_block_add_expr(ctx, block, HLSL_OP3_TERNARY, operands, instr->data_type, &cut_index->loc);
 | |
| 
 | |
|         hlsl_block_add_simple_store(ctx, block, var, ternary);
 | |
|     }
 | |
| 
 | |
|     hlsl_block_add_simple_load(ctx, block, var, &instr->loc);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static struct hlsl_type *clone_texture_array_as_combined_sampler_array(struct hlsl_ctx *ctx, struct hlsl_type *type)
 | |
| {
 | |
|     struct hlsl_type *sampler_type;
 | |
| 
 | |
|     if (type->class == HLSL_CLASS_ARRAY)
 | |
|     {
 | |
|         if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, type->e.array.type)))
 | |
|             return NULL;
 | |
| 
 | |
|         return hlsl_new_array_type(ctx, sampler_type, type->e.array.elements_count, HLSL_ARRAY_GENERIC);
 | |
|     }
 | |
| 
 | |
|     return ctx->builtin_types.sampler[type->sampler_dim];
 | |
| }
 | |
| 
 | |
| static bool deref_offset_is_zero(struct hlsl_ctx *ctx, const struct hlsl_deref *deref)
 | |
| {
 | |
|     enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref);
 | |
|     unsigned int index;
 | |
| 
 | |
|     if (!hlsl_regset_index_from_deref(ctx, deref, regset, &index))
 | |
|         return false;
 | |
|     return index == 0;
 | |
| }
 | |
| 
 | |
| /* Lower samples from separate texture and sampler variables to samples from
 | |
|  * synthetized combined samplers. That is, translate SM4-style samples in the
 | |
|  * source to SM1-style samples in the bytecode. */
 | |
| static bool lower_separate_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     struct hlsl_ir_var *var, *resource, *sampler;
 | |
|     struct hlsl_ir_resource_load *load;
 | |
|     struct vkd3d_string_buffer *name;
 | |
|     struct hlsl_type *sampler_type;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_RESOURCE_LOAD)
 | |
|         return false;
 | |
|     load = hlsl_ir_resource_load(instr);
 | |
| 
 | |
|     if (load->load_type != HLSL_RESOURCE_SAMPLE
 | |
|             && load->load_type != HLSL_RESOURCE_SAMPLE_GRAD
 | |
|             && load->load_type != HLSL_RESOURCE_SAMPLE_LOD
 | |
|             && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS)
 | |
|         return false;
 | |
| 
 | |
|     if (!load->sampler.var)
 | |
|         return false;
 | |
|     resource = load->resource.var;
 | |
|     sampler = load->sampler.var;
 | |
| 
 | |
|     VKD3D_ASSERT(hlsl_type_is_resource(resource->data_type));
 | |
|     VKD3D_ASSERT(hlsl_type_is_resource(sampler->data_type));
 | |
|     if (sampler->data_type->class == HLSL_CLASS_ARRAY && !deref_offset_is_zero(ctx, &load->sampler))
 | |
|     {
 | |
|         /* Not supported by d3dcompiler. */
 | |
|         hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_NOT_IMPLEMENTED,
 | |
|                 "Lower separated samples with sampler arrays.");
 | |
|         return false;
 | |
|     }
 | |
|     if (!resource->is_uniform)
 | |
|         return false;
 | |
|     if(!sampler->is_uniform)
 | |
|         return false;
 | |
| 
 | |
|     if (!(name = hlsl_get_string_buffer(ctx)))
 | |
|         return false;
 | |
|     vkd3d_string_buffer_printf(name, "%s+%s", sampler->name, resource->name);
 | |
| 
 | |
|     if (load->texel_offset.node)
 | |
|     {
 | |
|         hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
 | |
|                 "Texel offsets are not supported on profiles lower than 4.0.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     TRACE("Lowering to combined sampler %s.\n", debugstr_a(name->buffer));
 | |
| 
 | |
|     if (!(var = hlsl_get_var(ctx->globals, name->buffer)))
 | |
|     {
 | |
|         if (!(sampler_type = clone_texture_array_as_combined_sampler_array(ctx, resource->data_type)))
 | |
|         {
 | |
|             hlsl_release_string_buffer(ctx, name);
 | |
|             return false;
 | |
|         }
 | |
| 
 | |
|         if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, sampler_type, &instr->loc, false)))
 | |
|         {
 | |
|             hlsl_release_string_buffer(ctx, name);
 | |
|             return false;
 | |
|         }
 | |
|         var->storage_modifiers |= HLSL_STORAGE_UNIFORM;
 | |
|         var->is_combined_sampler = true;
 | |
|         var->is_uniform = 1;
 | |
| 
 | |
|         list_remove(&var->scope_entry);
 | |
|         list_add_after(&sampler->scope_entry, &var->scope_entry);
 | |
| 
 | |
|         list_add_after(&sampler->extern_entry, &var->extern_entry);
 | |
|     }
 | |
|     hlsl_release_string_buffer(ctx, name);
 | |
| 
 | |
|     /* Only change the deref's var, keep the path. */
 | |
|     load->resource.var = var;
 | |
|     hlsl_cleanup_deref(&load->sampler);
 | |
|     load->sampler.var = NULL;
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Lower combined samples and sampler variables to synthesized separated textures and samplers.
 | |
|  * That is, translate SM1-style samples in the source to SM4-style samples in the bytecode. */
 | |
| static bool lower_combined_samples(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     struct hlsl_ir_resource_load *load;
 | |
|     struct vkd3d_string_buffer *name;
 | |
|     struct hlsl_ir_var *var;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_RESOURCE_LOAD)
 | |
|         return false;
 | |
|     load = hlsl_ir_resource_load(instr);
 | |
| 
 | |
|     switch (load->load_type)
 | |
|     {
 | |
|         case HLSL_RESOURCE_LOAD:
 | |
|         case HLSL_RESOURCE_GATHER_RED:
 | |
|         case HLSL_RESOURCE_GATHER_GREEN:
 | |
|         case HLSL_RESOURCE_GATHER_BLUE:
 | |
|         case HLSL_RESOURCE_GATHER_ALPHA:
 | |
|         case HLSL_RESOURCE_GATHER_CMP_RED:
 | |
|         case HLSL_RESOURCE_GATHER_CMP_GREEN:
 | |
|         case HLSL_RESOURCE_GATHER_CMP_BLUE:
 | |
|         case HLSL_RESOURCE_GATHER_CMP_ALPHA:
 | |
|         case HLSL_RESOURCE_RESINFO:
 | |
|         case HLSL_RESOURCE_SAMPLE_CMP:
 | |
|         case HLSL_RESOURCE_SAMPLE_CMP_LZ:
 | |
|         case HLSL_RESOURCE_SAMPLE_INFO:
 | |
|             return false;
 | |
| 
 | |
|         case HLSL_RESOURCE_SAMPLE:
 | |
|         case HLSL_RESOURCE_SAMPLE_GRAD:
 | |
|         case HLSL_RESOURCE_SAMPLE_LOD:
 | |
|         case HLSL_RESOURCE_SAMPLE_LOD_BIAS:
 | |
|         case HLSL_RESOURCE_SAMPLE_PROJ:
 | |
|             break;
 | |
|     }
 | |
|     if (load->sampler.var)
 | |
|         return false;
 | |
| 
 | |
|     if (!hlsl_type_is_resource(load->resource.var->data_type))
 | |
|     {
 | |
|         hlsl_fixme(ctx, &instr->loc, "Lower combined samplers within structs.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     VKD3D_ASSERT(hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_SAMPLERS);
 | |
| 
 | |
|     if (!(name = hlsl_get_string_buffer(ctx)))
 | |
|         return false;
 | |
|     vkd3d_string_buffer_printf(name, "<resource>%s", load->resource.var->name);
 | |
| 
 | |
|     TRACE("Lowering to separate resource %s.\n", debugstr_a(name->buffer));
 | |
| 
 | |
|     if (!(var = hlsl_get_var(ctx->globals, name->buffer)))
 | |
|     {
 | |
|         struct hlsl_type *texture_array_type = hlsl_new_texture_type(ctx, load->sampling_dim,
 | |
|                 hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), 0);
 | |
| 
 | |
|         /* Create (possibly multi-dimensional) texture array type with the same dims as the sampler array. */
 | |
|         struct hlsl_type *arr_type = load->resource.var->data_type;
 | |
|         for (i = 0; i < load->resource.path_len; ++i)
 | |
|         {
 | |
|             VKD3D_ASSERT(arr_type->class == HLSL_CLASS_ARRAY);
 | |
|             texture_array_type = hlsl_new_array_type(ctx, texture_array_type,
 | |
|                     arr_type->e.array.elements_count, HLSL_ARRAY_GENERIC);
 | |
|             arr_type = arr_type->e.array.type;
 | |
|         }
 | |
| 
 | |
|         if (!(var = hlsl_new_synthetic_var_named(ctx, name->buffer, texture_array_type, &instr->loc, false)))
 | |
|         {
 | |
|             hlsl_release_string_buffer(ctx, name);
 | |
|             return false;
 | |
|         }
 | |
|         var->is_uniform = 1;
 | |
|         var->is_separated_resource = true;
 | |
| 
 | |
|         list_add_tail(&ctx->extern_vars, &var->extern_entry);
 | |
|     }
 | |
|     hlsl_release_string_buffer(ctx, name);
 | |
| 
 | |
|     if (load->sampling_dim != var->data_type->sampler_dim)
 | |
|     {
 | |
|         hlsl_error(ctx, &load->node.loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER,
 | |
|                 "Cannot split combined samplers from \"%s\" if they have different usage dimensions.",
 | |
|                 load->resource.var->name);
 | |
|         hlsl_note(ctx, &var->loc, VKD3D_SHADER_LOG_ERROR, "First use as combined sampler is here.");
 | |
|         return false;
 | |
| 
 | |
|     }
 | |
| 
 | |
|     hlsl_copy_deref(ctx, &load->sampler, &load->resource);
 | |
|     load->resource.var = var;
 | |
|     VKD3D_ASSERT(hlsl_deref_get_type(ctx, &load->resource)->class == HLSL_CLASS_TEXTURE);
 | |
|     VKD3D_ASSERT(hlsl_deref_get_type(ctx, &load->sampler)->class == HLSL_CLASS_SAMPLER);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static void insert_ensuring_decreasing_bind_count(struct list *list, struct hlsl_ir_var *to_add,
 | |
|         enum hlsl_regset regset)
 | |
| {
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, list, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (var->bind_count[regset] < to_add->bind_count[regset])
 | |
|         {
 | |
|             list_add_before(&var->extern_entry, &to_add->extern_entry);
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     list_add_tail(list, &to_add->extern_entry);
 | |
| }
 | |
| 
 | |
| static bool sort_synthetic_combined_samplers_first(struct hlsl_ctx *ctx)
 | |
| {
 | |
|     struct list separated_resources;
 | |
|     struct hlsl_ir_var *var, *next;
 | |
| 
 | |
|     list_init(&separated_resources);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (var->is_combined_sampler)
 | |
|         {
 | |
|             list_remove(&var->extern_entry);
 | |
|             insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_SAMPLERS);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     list_move_head(&ctx->extern_vars, &separated_resources);
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool sort_synthetic_separated_samplers_first(struct hlsl_ctx *ctx)
 | |
| {
 | |
|     struct list separated_resources;
 | |
|     struct hlsl_ir_var *var, *next;
 | |
| 
 | |
|     list_init(&separated_resources);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (var->is_separated_resource)
 | |
|         {
 | |
|             list_remove(&var->extern_entry);
 | |
|             insert_ensuring_decreasing_bind_count(&separated_resources, var, HLSL_REGSET_TEXTURES);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     list_move_head(&ctx->extern_vars, &separated_resources);
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| /* Turn CAST to int or uint into TRUNC + REINTERPRET */
 | |
| static bool lower_casts_to_int(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 };
 | |
|     struct hlsl_ir_node *arg, *trunc;
 | |
|     struct hlsl_ir_expr *expr;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     if (expr->op != HLSL_OP1_CAST)
 | |
|         return false;
 | |
| 
 | |
|     arg = expr->operands[0].node;
 | |
|     if (!hlsl_type_is_integer(instr->data_type) || instr->data_type->e.numeric.type == HLSL_TYPE_BOOL)
 | |
|         return false;
 | |
|     if (!hlsl_type_is_floating_point(arg->data_type))
 | |
|         return false;
 | |
| 
 | |
|     trunc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_TRUNC, arg, &instr->loc);
 | |
| 
 | |
|     memset(operands, 0, sizeof(operands));
 | |
|     operands[0] = trunc;
 | |
|     hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Turn TRUNC into:
 | |
|  *
 | |
|  *     TRUNC(x) = x - FRACT(x) + extra
 | |
|  *
 | |
|  * where
 | |
|  *
 | |
|  *      extra = FRACT(x) > 0 && x < 0
 | |
|  *
 | |
|  * where the comparisons in the extra term are performed using CMP or SLT
 | |
|  * depending on whether this is a pixel or vertex shader, respectively.
 | |
|  */
 | |
| static bool lower_trunc(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg, *res;
 | |
|     struct hlsl_ir_expr *expr;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     if (expr->op != HLSL_OP1_TRUNC)
 | |
|         return false;
 | |
| 
 | |
|     arg = expr->operands[0].node;
 | |
| 
 | |
|     if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
 | |
|     {
 | |
|         struct hlsl_ir_node *fract, *neg_fract, *has_fract, *floor, *extra, *zero, *one;
 | |
|         struct hlsl_constant_value zero_value, one_value;
 | |
| 
 | |
|         memset(&zero_value, 0, sizeof(zero_value));
 | |
|         if (!(zero = hlsl_new_constant(ctx, arg->data_type, &zero_value, &instr->loc)))
 | |
|             return false;
 | |
|         hlsl_block_add_instr(block, zero);
 | |
| 
 | |
|         one_value.u[0].f = 1.0;
 | |
|         one_value.u[1].f = 1.0;
 | |
|         one_value.u[2].f = 1.0;
 | |
|         one_value.u[3].f = 1.0;
 | |
|         if (!(one = hlsl_new_constant(ctx, arg->data_type, &one_value, &instr->loc)))
 | |
|             return false;
 | |
|         hlsl_block_add_instr(block, one);
 | |
| 
 | |
|         fract = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, arg, &instr->loc);
 | |
|         neg_fract = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, fract, &instr->loc);
 | |
| 
 | |
|         if (!(has_fract = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, neg_fract, zero, one)))
 | |
|             return false;
 | |
|         hlsl_block_add_instr(block, has_fract);
 | |
| 
 | |
|         if (!(extra = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, arg, zero, has_fract)))
 | |
|             return false;
 | |
|         hlsl_block_add_instr(block, extra);
 | |
| 
 | |
|         floor = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, arg, neg_fract);
 | |
|         res = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, floor, extra);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         struct hlsl_ir_node *neg_arg, *is_neg, *fract, *neg_fract, *has_fract, *floor;
 | |
| 
 | |
|         neg_arg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg, &instr->loc);
 | |
|         is_neg = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_SLT, arg, neg_arg);
 | |
|         fract = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, arg, &instr->loc);
 | |
|         neg_fract = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, fract, &instr->loc);
 | |
|         has_fract = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_SLT, neg_fract, fract);
 | |
|         floor = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, arg, neg_fract);
 | |
| 
 | |
|         if (!(res = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, is_neg, has_fract, floor)))
 | |
|             return false;
 | |
|         hlsl_block_add_instr(block, res);
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Lower modulus using:
 | |
|  *
 | |
|  *     mod(x, y) = x - trunc(x / y) * y;
 | |
|  *
 | |
|  */
 | |
| static bool lower_int_modulus_sm1(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *div, *trunc, *mul, *neg, *operands[2], *ret;
 | |
|     struct hlsl_type *float_type;
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     bool is_float;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     if (expr->op != HLSL_OP2_MOD)
 | |
|         return false;
 | |
| 
 | |
|     is_float = instr->data_type->e.numeric.type == HLSL_TYPE_FLOAT
 | |
|             || instr->data_type->e.numeric.type == HLSL_TYPE_HALF;
 | |
|     if (is_float)
 | |
|         return false;
 | |
|     float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
 | |
| 
 | |
|     for (unsigned int i = 0; i < 2; ++i)
 | |
|     {
 | |
|         operands[i] = hlsl_block_add_cast(ctx, block, expr->operands[i].node, float_type, &instr->loc);
 | |
|     }
 | |
| 
 | |
|     div = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_DIV, operands[0], operands[1]);
 | |
|     trunc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_TRUNC, div, &instr->loc);
 | |
|     mul = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, trunc, operands[1]);
 | |
|     neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, mul, &instr->loc);
 | |
|     ret = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, operands[0], neg);
 | |
|     hlsl_block_add_cast(ctx, block, ret, instr->data_type, &instr->loc);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Lower DIV to RCP + MUL. */
 | |
| static bool lower_division(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *rcp, *ret, *operands[2];
 | |
|     struct hlsl_type *float_type;
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     bool is_float;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     if (expr->op != HLSL_OP2_DIV)
 | |
|         return false;
 | |
| 
 | |
|     is_float = instr->data_type->e.numeric.type == HLSL_TYPE_FLOAT
 | |
|             || instr->data_type->e.numeric.type == HLSL_TYPE_HALF;
 | |
|     float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
 | |
| 
 | |
|     for (unsigned int i = 0; i < 2; ++i)
 | |
|     {
 | |
|         operands[i] = expr->operands[i].node;
 | |
|         if (!is_float)
 | |
|             operands[i] = hlsl_block_add_cast(ctx, block, operands[i], float_type, &instr->loc);
 | |
|     }
 | |
| 
 | |
|     rcp = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_RCP, operands[1], &instr->loc);
 | |
|     ret = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, operands[0], rcp);
 | |
|     if (!is_float)
 | |
|         ret = hlsl_block_add_cast(ctx, block, ret, instr->data_type, &instr->loc);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Lower SQRT to RSQ + RCP. */
 | |
| static bool lower_sqrt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     struct hlsl_ir_node *rsq;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     if (expr->op != HLSL_OP1_SQRT)
 | |
|         return false;
 | |
| 
 | |
|     rsq = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_RSQ, expr->operands[0].node, &instr->loc);
 | |
|     hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_RCP, rsq, &instr->loc);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Lower DP2 to MUL + ADD */
 | |
| static bool lower_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg1, *arg2, *mul, *add_x, *add_y;
 | |
|     struct hlsl_ir_expr *expr;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     arg1 = expr->operands[0].node;
 | |
|     arg2 = expr->operands[1].node;
 | |
|     if (expr->op != HLSL_OP2_DOT)
 | |
|         return false;
 | |
|     if (arg1->data_type->e.numeric.dimx != 2)
 | |
|         return false;
 | |
| 
 | |
|     if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
 | |
|     {
 | |
|         struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 };
 | |
| 
 | |
|         operands[0] = arg1;
 | |
|         operands[1] = arg2;
 | |
|         operands[2] = hlsl_block_add_float_constant(ctx, block, 0.0f, &expr->node.loc);
 | |
| 
 | |
|         hlsl_block_add_expr(ctx, block, HLSL_OP3_DP2ADD, operands, instr->data_type, &expr->node.loc);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         mul = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, expr->operands[0].node, expr->operands[1].node);
 | |
| 
 | |
|         add_x = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(X, X, X, X),
 | |
|                 instr->data_type->e.numeric.dimx, mul, &expr->node.loc);
 | |
|         add_y = hlsl_block_add_swizzle(ctx, block, HLSL_SWIZZLE(Y, Y, Y, Y),
 | |
|                 instr->data_type->e.numeric.dimx, mul, &expr->node.loc);
 | |
|         hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, add_x, add_y);
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Lower ABS to MAX */
 | |
| static bool lower_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg, *neg;
 | |
|     struct hlsl_ir_expr *expr;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     arg = expr->operands[0].node;
 | |
|     if (expr->op != HLSL_OP1_ABS)
 | |
|         return false;
 | |
| 
 | |
|     neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg, &instr->loc);
 | |
|     hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MAX, neg, arg);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Lower ROUND using FRC, ROUND(x) -> ((x + 0.5) - FRC(x + 0.5)). */
 | |
| static bool lower_round(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg, *neg, *sum, *frc, *half;
 | |
|     struct hlsl_type *type = instr->data_type;
 | |
|     struct hlsl_constant_value half_value;
 | |
|     unsigned int i, component_count;
 | |
|     struct hlsl_ir_expr *expr;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
| 
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     arg = expr->operands[0].node;
 | |
|     if (expr->op != HLSL_OP1_ROUND)
 | |
|         return false;
 | |
| 
 | |
|     component_count = hlsl_type_component_count(type);
 | |
|     for (i = 0; i < component_count; ++i)
 | |
|         half_value.u[i].f = 0.5f;
 | |
|     if (!(half = hlsl_new_constant(ctx, type, &half_value, &expr->node.loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, half);
 | |
| 
 | |
|     sum = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, arg, half);
 | |
|     frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, sum, &instr->loc);
 | |
|     neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, frc, &instr->loc);
 | |
|     hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, sum, neg);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Lower CEIL to FRC */
 | |
| static bool lower_ceil(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg, *neg, *frc;
 | |
|     struct hlsl_ir_expr *expr;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
| 
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     arg = expr->operands[0].node;
 | |
|     if (expr->op != HLSL_OP1_CEIL)
 | |
|         return false;
 | |
| 
 | |
|     neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg, &instr->loc);
 | |
|     frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, neg, &instr->loc);
 | |
|     hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, frc, arg);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Lower FLOOR to FRC */
 | |
| static bool lower_floor(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg, *neg, *frc;
 | |
|     struct hlsl_ir_expr *expr;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
| 
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     arg = expr->operands[0].node;
 | |
|     if (expr->op != HLSL_OP1_FLOOR)
 | |
|         return false;
 | |
| 
 | |
|     frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, arg, &instr->loc);
 | |
|     neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, frc, &instr->loc);
 | |
|     hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, neg, arg);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Lower SIN/COS to SINCOS for SM1.  */
 | |
| static bool lower_trig(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg, *half, *two_pi, *reciprocal_two_pi, *neg_pi;
 | |
|     struct hlsl_constant_value half_value, two_pi_value, reciprocal_two_pi_value, neg_pi_value;
 | |
|     struct hlsl_ir_node *mad, *frc, *reduced;
 | |
|     struct hlsl_type *type;
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     enum hlsl_ir_expr_op op;
 | |
|     struct hlsl_ir_node *sincos;
 | |
|     int i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
| 
 | |
|     if (expr->op == HLSL_OP1_SIN)
 | |
|         op = HLSL_OP1_SIN_REDUCED;
 | |
|     else if (expr->op == HLSL_OP1_COS)
 | |
|         op = HLSL_OP1_COS_REDUCED;
 | |
|     else
 | |
|         return false;
 | |
| 
 | |
|     arg = expr->operands[0].node;
 | |
|     type = arg->data_type;
 | |
| 
 | |
|     /* Reduce the range of the input angles to [-pi, pi]. */
 | |
|     for (i = 0; i < type->e.numeric.dimx; ++i)
 | |
|     {
 | |
|         half_value.u[i].f = 0.5;
 | |
|         two_pi_value.u[i].f = 2.0 * M_PI;
 | |
|         reciprocal_two_pi_value.u[i].f = 1.0 / (2.0 * M_PI);
 | |
|         neg_pi_value.u[i].f = -M_PI;
 | |
|     }
 | |
| 
 | |
|     if (!(half = hlsl_new_constant(ctx, type, &half_value, &instr->loc))
 | |
|             || !(two_pi = hlsl_new_constant(ctx, type, &two_pi_value, &instr->loc))
 | |
|             || !(reciprocal_two_pi = hlsl_new_constant(ctx, type, &reciprocal_two_pi_value, &instr->loc))
 | |
|             || !(neg_pi = hlsl_new_constant(ctx, type, &neg_pi_value, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, half);
 | |
|     hlsl_block_add_instr(block, two_pi);
 | |
|     hlsl_block_add_instr(block, reciprocal_two_pi);
 | |
|     hlsl_block_add_instr(block, neg_pi);
 | |
| 
 | |
|     if (!(mad = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, arg, reciprocal_two_pi, half)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, mad);
 | |
|     frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, mad, &instr->loc);
 | |
|     if (!(reduced = hlsl_new_ternary_expr(ctx, HLSL_OP3_MAD, frc, two_pi, neg_pi)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, reduced);
 | |
| 
 | |
|     if (type->e.numeric.dimx == 1)
 | |
|     {
 | |
|         sincos = hlsl_block_add_unary_expr(ctx, block, op, reduced, &instr->loc);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         struct hlsl_ir_node *comps[4] = {0};
 | |
|         struct hlsl_ir_var *var;
 | |
|         struct hlsl_deref var_deref;
 | |
| 
 | |
|         for (i = 0; i < type->e.numeric.dimx; ++i)
 | |
|         {
 | |
|             uint32_t s = hlsl_swizzle_from_writemask(1 << i);
 | |
| 
 | |
|             comps[i] = hlsl_block_add_swizzle(ctx, block, s, 1, reduced, &instr->loc);
 | |
|         }
 | |
| 
 | |
|         if (!(var = hlsl_new_synthetic_var(ctx, "sincos", type, &instr->loc)))
 | |
|             return false;
 | |
|         hlsl_init_simple_deref_from_var(&var_deref, var);
 | |
| 
 | |
|         for (i = 0; i < type->e.numeric.dimx; ++i)
 | |
|         {
 | |
|             sincos = hlsl_block_add_unary_expr(ctx, block, op, comps[i], &instr->loc);
 | |
|             hlsl_block_add_store_component(ctx, block, &var_deref, i, sincos);
 | |
|         }
 | |
| 
 | |
|         hlsl_block_add_load_index(ctx, block, &var_deref, NULL, &instr->loc);
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool lower_logic_not(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg, *arg_cast, *neg, *one, *sub;
 | |
|     struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS];
 | |
|     struct hlsl_constant_value one_value;
 | |
|     struct hlsl_type *float_type;
 | |
|     struct hlsl_ir_expr *expr;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     if (expr->op != HLSL_OP1_LOGIC_NOT)
 | |
|         return false;
 | |
| 
 | |
|     arg = expr->operands[0].node;
 | |
|     float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, arg->data_type->e.numeric.dimx);
 | |
| 
 | |
|     /* If this is happens, it means we failed to cast the argument to boolean somewhere. */
 | |
|     VKD3D_ASSERT(arg->data_type->e.numeric.type == HLSL_TYPE_BOOL);
 | |
| 
 | |
|     arg_cast = hlsl_block_add_cast(ctx, block, arg, float_type, &arg->loc);
 | |
| 
 | |
|     neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg_cast, &instr->loc);
 | |
| 
 | |
|     one_value.u[0].f = 1.0;
 | |
|     one_value.u[1].f = 1.0;
 | |
|     one_value.u[2].f = 1.0;
 | |
|     one_value.u[3].f = 1.0;
 | |
|     if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, one);
 | |
| 
 | |
|     sub = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, one, neg);
 | |
| 
 | |
|     memset(operands, 0, sizeof(operands));
 | |
|     operands[0] = sub;
 | |
|     hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Lower TERNARY to CMP for SM1. */
 | |
| static bool lower_ternary(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *cond, *first, *second, *float_cond, *neg;
 | |
|     struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = {0};
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     struct hlsl_type *type;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
| 
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     if (expr->op != HLSL_OP3_TERNARY)
 | |
|         return false;
 | |
| 
 | |
|     cond = expr->operands[0].node;
 | |
|     first = expr->operands[1].node;
 | |
|     second = expr->operands[2].node;
 | |
| 
 | |
|     if (cond->data_type->class > HLSL_CLASS_VECTOR || instr->data_type->class > HLSL_CLASS_VECTOR)
 | |
|     {
 | |
|         hlsl_fixme(ctx, &instr->loc, "Lower ternary of type other than scalar or vector.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     VKD3D_ASSERT(cond->data_type->e.numeric.type == HLSL_TYPE_BOOL);
 | |
| 
 | |
|     type = hlsl_get_numeric_type(ctx, instr->data_type->class, HLSL_TYPE_FLOAT,
 | |
|             instr->data_type->e.numeric.dimx, instr->data_type->e.numeric.dimy);
 | |
|     float_cond = hlsl_block_add_cast(ctx, block, cond, type, &instr->loc);
 | |
|     neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, float_cond, &instr->loc);
 | |
| 
 | |
|     memset(operands, 0, sizeof(operands));
 | |
|     operands[0] = neg;
 | |
|     operands[1] = second;
 | |
|     operands[2] = first;
 | |
|     hlsl_block_add_expr(ctx, block, HLSL_OP3_CMP, operands, first->data_type, &instr->loc);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool lower_resource_load_bias(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     struct hlsl_ir_node *swizzle, *store;
 | |
|     struct hlsl_ir_resource_load *load;
 | |
|     struct hlsl_ir_load *tmp_load;
 | |
|     struct hlsl_ir_var *tmp_var;
 | |
|     struct hlsl_deref deref;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_RESOURCE_LOAD)
 | |
|         return false;
 | |
|     load = hlsl_ir_resource_load(instr);
 | |
|     if (load->load_type != HLSL_RESOURCE_SAMPLE_LOD
 | |
|             && load->load_type != HLSL_RESOURCE_SAMPLE_LOD_BIAS)
 | |
|         return false;
 | |
| 
 | |
|     if (!load->lod.node)
 | |
|         return false;
 | |
| 
 | |
|     if (!(tmp_var = hlsl_new_synthetic_var(ctx, "coords-with-lod",
 | |
|             hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4), &instr->loc)))
 | |
|         return false;
 | |
| 
 | |
|     if (!(swizzle = hlsl_new_swizzle(ctx, HLSL_SWIZZLE(X, X, X, X), 4, load->lod.node, &load->lod.node->loc)))
 | |
|         return false;
 | |
|     list_add_before(&instr->entry, &swizzle->entry);
 | |
| 
 | |
|     if (!(store = hlsl_new_simple_store(ctx, tmp_var, swizzle)))
 | |
|         return false;
 | |
|     list_add_before(&instr->entry, &store->entry);
 | |
| 
 | |
|     hlsl_init_simple_deref_from_var(&deref, tmp_var);
 | |
|     if (!(store = hlsl_new_store_index(ctx, &deref, NULL, load->coords.node, 0, &instr->loc)))
 | |
|         return false;
 | |
|     list_add_before(&instr->entry, &store->entry);
 | |
| 
 | |
|     if (!(tmp_load = hlsl_new_var_load(ctx, tmp_var, &instr->loc)))
 | |
|         return false;
 | |
|     list_add_before(&instr->entry, &tmp_load->node.entry);
 | |
| 
 | |
|     hlsl_src_remove(&load->coords);
 | |
|     hlsl_src_from_node(&load->coords, &tmp_load->node);
 | |
|     hlsl_src_remove(&load->lod);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool lower_comparison_operators(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr,
 | |
|         struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg1, *arg1_cast, *arg2, *arg2_cast, *slt, *res;
 | |
|     struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS];
 | |
|     struct hlsl_type *float_type;
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     bool negate = false;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     if (expr->op != HLSL_OP2_EQUAL && expr->op != HLSL_OP2_NEQUAL && expr->op != HLSL_OP2_LESS
 | |
|             && expr->op != HLSL_OP2_GEQUAL)
 | |
|         return false;
 | |
| 
 | |
|     arg1 = expr->operands[0].node;
 | |
|     arg2 = expr->operands[1].node;
 | |
|     float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
 | |
| 
 | |
|     arg1_cast = hlsl_block_add_cast(ctx, block, arg1, float_type, &instr->loc);
 | |
|     arg2_cast = hlsl_block_add_cast(ctx, block, arg2, float_type, &instr->loc);
 | |
| 
 | |
|     switch (expr->op)
 | |
|     {
 | |
|         case HLSL_OP2_EQUAL:
 | |
|         case HLSL_OP2_NEQUAL:
 | |
|         {
 | |
|             struct hlsl_ir_node *neg, *sub, *abs, *abs_neg;
 | |
| 
 | |
|             neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg2_cast, &instr->loc);
 | |
|             sub = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, arg1_cast, neg);
 | |
| 
 | |
|             if (ctx->profile->major_version >= 3)
 | |
|             {
 | |
|                 abs = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_ABS, sub, &instr->loc);
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 /* Use MUL as a precarious ABS. */
 | |
|                 abs = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, sub, sub);
 | |
|             }
 | |
| 
 | |
|             abs_neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, abs, &instr->loc);
 | |
|             slt = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_SLT, abs_neg, abs);
 | |
|             negate = (expr->op == HLSL_OP2_EQUAL);
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         case HLSL_OP2_GEQUAL:
 | |
|         case HLSL_OP2_LESS:
 | |
|         {
 | |
|             slt = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_SLT, arg1_cast, arg2_cast);
 | |
|             negate = (expr->op == HLSL_OP2_GEQUAL);
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         default:
 | |
|             vkd3d_unreachable();
 | |
|     }
 | |
| 
 | |
|     if (negate)
 | |
|     {
 | |
|         struct hlsl_constant_value one_value;
 | |
|         struct hlsl_ir_node *one, *slt_neg;
 | |
| 
 | |
|         one_value.u[0].f = 1.0;
 | |
|         one_value.u[1].f = 1.0;
 | |
|         one_value.u[2].f = 1.0;
 | |
|         one_value.u[3].f = 1.0;
 | |
|         if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc)))
 | |
|             return false;
 | |
|         hlsl_block_add_instr(block, one);
 | |
| 
 | |
|         slt_neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, slt, &instr->loc);
 | |
|         res = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, one, slt_neg);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         res = slt;
 | |
|     }
 | |
| 
 | |
|     /* We need a REINTERPRET so that the HLSL IR code is valid. SLT and its arguments must be FLOAT,
 | |
|      * and casts to BOOL have already been lowered to "!= 0". */
 | |
|     memset(operands, 0, sizeof(operands));
 | |
|     operands[0] = res;
 | |
|     hlsl_block_add_expr(ctx, block, HLSL_OP1_REINTERPRET, operands, instr->data_type, &instr->loc);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Intended to be used for SM1-SM3, lowers SLT instructions (only available in vertex shaders) to
 | |
|  * CMP instructions (only available in pixel shaders).
 | |
|  * Based on the following equivalence:
 | |
|  *     SLT(x, y)
 | |
|  *     = (x < y) ? 1.0 : 0.0
 | |
|  *     = ((x - y) >= 0) ? 0.0 : 1.0
 | |
|  *     = CMP(x - y, 0.0, 1.0)
 | |
|  */
 | |
| static bool lower_slt(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg1, *arg2, *arg1_cast, *arg2_cast, *neg, *sub, *zero, *one, *cmp;
 | |
|     struct hlsl_constant_value zero_value, one_value;
 | |
|     struct hlsl_type *float_type;
 | |
|     struct hlsl_ir_expr *expr;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     if (expr->op != HLSL_OP2_SLT)
 | |
|         return false;
 | |
| 
 | |
|     arg1 = expr->operands[0].node;
 | |
|     arg2 = expr->operands[1].node;
 | |
|     float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
 | |
| 
 | |
|     arg1_cast = hlsl_block_add_cast(ctx, block, arg1, float_type, &instr->loc);
 | |
|     arg2_cast = hlsl_block_add_cast(ctx, block, arg2, float_type, &instr->loc);
 | |
|     neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg2_cast, &instr->loc);
 | |
|     sub = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, arg1_cast, neg);
 | |
| 
 | |
|     memset(&zero_value, 0, sizeof(zero_value));
 | |
|     if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, zero);
 | |
| 
 | |
|     one_value.u[0].f = 1.0;
 | |
|     one_value.u[1].f = 1.0;
 | |
|     one_value.u[2].f = 1.0;
 | |
|     one_value.u[3].f = 1.0;
 | |
|     if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, one);
 | |
| 
 | |
|     if (!(cmp = hlsl_new_ternary_expr(ctx, HLSL_OP3_CMP, sub, zero, one)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, cmp);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Intended to be used for SM1-SM3, lowers CMP instructions (only available in pixel shaders) to
 | |
|  * SLT instructions (only available in vertex shaders).
 | |
|  * Based on the following equivalence:
 | |
|  *     CMP(x, y, z)
 | |
|  *     = (x >= 0) ? y : z
 | |
|  *     = z * ((x < 0) ? 1.0 : 0.0) + y * ((x < 0) ? 0.0 : 1.0)
 | |
|  *     = z * SLT(x, 0.0) + y * (1 - SLT(x, 0.0))
 | |
|  */
 | |
| static bool lower_cmp(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *args[3], *args_cast[3], *slt, *neg_slt, *sub, *zero, *one, *mul1, *mul2;
 | |
|     struct hlsl_constant_value zero_value, one_value;
 | |
|     struct hlsl_type *float_type;
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     if (expr->op != HLSL_OP3_CMP)
 | |
|         return false;
 | |
| 
 | |
|     float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, instr->data_type->e.numeric.dimx);
 | |
| 
 | |
|     for (i = 0; i < 3; ++i)
 | |
|     {
 | |
|         args[i] = expr->operands[i].node;
 | |
|         args_cast[i] = hlsl_block_add_cast(ctx, block, args[i], float_type, &instr->loc);
 | |
|     }
 | |
| 
 | |
|     memset(&zero_value, 0, sizeof(zero_value));
 | |
|     if (!(zero = hlsl_new_constant(ctx, float_type, &zero_value, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, zero);
 | |
| 
 | |
|     one_value.u[0].f = 1.0;
 | |
|     one_value.u[1].f = 1.0;
 | |
|     one_value.u[2].f = 1.0;
 | |
|     one_value.u[3].f = 1.0;
 | |
|     if (!(one = hlsl_new_constant(ctx, float_type, &one_value, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, one);
 | |
| 
 | |
|     slt = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_SLT, args_cast[0], zero);
 | |
|     mul1 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, args_cast[2], slt);
 | |
|     neg_slt = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, slt, &instr->loc);
 | |
|     sub = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, one, neg_slt);
 | |
|     mul2 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, args_cast[1], sub);
 | |
|     hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_ADD, mul1, mul2);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool lower_casts_to_bool(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_type *type = instr->data_type, *arg_type;
 | |
|     static const struct hlsl_constant_value zero_value;
 | |
|     struct hlsl_ir_node *zero, *neq;
 | |
|     struct hlsl_ir_expr *expr;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     if (expr->op != HLSL_OP1_CAST)
 | |
|         return false;
 | |
|     arg_type = expr->operands[0].node->data_type;
 | |
|     if (type->class > HLSL_CLASS_VECTOR || arg_type->class > HLSL_CLASS_VECTOR)
 | |
|         return false;
 | |
|     if (type->e.numeric.type != HLSL_TYPE_BOOL)
 | |
|         return false;
 | |
| 
 | |
|     /* Narrowing casts should have already been lowered. */
 | |
|     VKD3D_ASSERT(type->e.numeric.dimx == arg_type->e.numeric.dimx);
 | |
| 
 | |
|     zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc);
 | |
|     if (!zero)
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, zero);
 | |
| 
 | |
|     neq = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_NEQUAL, expr->operands[0].node, zero);
 | |
|     neq->data_type = expr->node.data_type;
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| struct hlsl_ir_node *hlsl_add_conditional(struct hlsl_ctx *ctx, struct hlsl_block *instrs,
 | |
|         struct hlsl_ir_node *condition, struct hlsl_ir_node *if_true, struct hlsl_ir_node *if_false)
 | |
| {
 | |
|     struct hlsl_type *cond_type = condition->data_type;
 | |
|     struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS];
 | |
| 
 | |
|     VKD3D_ASSERT(hlsl_types_are_equal(if_true->data_type, if_false->data_type));
 | |
| 
 | |
|     if (cond_type->e.numeric.type != HLSL_TYPE_BOOL)
 | |
|     {
 | |
|         cond_type = hlsl_get_numeric_type(ctx, cond_type->class, HLSL_TYPE_BOOL,
 | |
|                 cond_type->e.numeric.dimx, cond_type->e.numeric.dimy);
 | |
|         condition = hlsl_block_add_cast(ctx, instrs, condition, cond_type, &condition->loc);
 | |
|     }
 | |
| 
 | |
|     operands[0] = condition;
 | |
|     operands[1] = if_true;
 | |
|     operands[2] = if_false;
 | |
|     return hlsl_block_add_expr(ctx, instrs, HLSL_OP3_TERNARY, operands, if_true->data_type, &condition->loc);
 | |
| }
 | |
| 
 | |
| static bool lower_int_division_sm4(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg1, *arg2, *xor, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit;
 | |
|     struct hlsl_type *type = instr->data_type, *utype;
 | |
|     struct hlsl_constant_value high_bit_value;
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     arg1 = expr->operands[0].node;
 | |
|     arg2 = expr->operands[1].node;
 | |
|     if (expr->op != HLSL_OP2_DIV)
 | |
|         return false;
 | |
|     if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR)
 | |
|         return false;
 | |
|     if (type->e.numeric.type != HLSL_TYPE_INT)
 | |
|         return false;
 | |
|     utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy);
 | |
| 
 | |
|     xor = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_BIT_XOR, arg1, arg2);
 | |
| 
 | |
|     for (i = 0; i < type->e.numeric.dimx; ++i)
 | |
|         high_bit_value.u[i].u = 0x80000000;
 | |
|     if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, high_bit);
 | |
| 
 | |
|     and = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_BIT_AND, xor, high_bit);
 | |
|     abs1 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_ABS, arg1, &instr->loc);
 | |
|     cast1 = hlsl_block_add_cast(ctx, block, abs1, utype, &instr->loc);
 | |
|     abs2 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_ABS, arg2, &instr->loc);
 | |
|     cast2 = hlsl_block_add_cast(ctx, block, abs2, utype, &instr->loc);
 | |
|     div = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_DIV, cast1, cast2);
 | |
|     cast3 = hlsl_block_add_cast(ctx, block, div, type, &instr->loc);
 | |
|     neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, cast3, &instr->loc);
 | |
|     return hlsl_add_conditional(ctx, block, and, neg, cast3);
 | |
| }
 | |
| 
 | |
| static bool lower_int_modulus_sm4(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg1, *arg2, *and, *abs1, *abs2, *div, *neg, *cast1, *cast2, *cast3, *high_bit;
 | |
|     struct hlsl_type *type = instr->data_type, *utype;
 | |
|     struct hlsl_constant_value high_bit_value;
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     arg1 = expr->operands[0].node;
 | |
|     arg2 = expr->operands[1].node;
 | |
|     if (expr->op != HLSL_OP2_MOD)
 | |
|         return false;
 | |
|     if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR)
 | |
|         return false;
 | |
|     if (type->e.numeric.type != HLSL_TYPE_INT)
 | |
|         return false;
 | |
|     utype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_UINT, type->e.numeric.dimx, type->e.numeric.dimy);
 | |
| 
 | |
|     for (i = 0; i < type->e.numeric.dimx; ++i)
 | |
|         high_bit_value.u[i].u = 0x80000000;
 | |
|     if (!(high_bit = hlsl_new_constant(ctx, type, &high_bit_value, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, high_bit);
 | |
| 
 | |
|     and = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_BIT_AND, arg1, high_bit);
 | |
|     abs1 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_ABS, arg1, &instr->loc);
 | |
|     cast1 = hlsl_block_add_cast(ctx, block, abs1, utype, &instr->loc);
 | |
|     abs2 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_ABS, arg2, &instr->loc);
 | |
|     cast2 = hlsl_block_add_cast(ctx, block, abs2, utype, &instr->loc);
 | |
|     div = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MOD, cast1, cast2);
 | |
|     cast3 = hlsl_block_add_cast(ctx, block, div, type, &instr->loc);
 | |
|     neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, cast3, &instr->loc);
 | |
|     return hlsl_add_conditional(ctx, block, and, neg, cast3);
 | |
| }
 | |
| 
 | |
| static bool lower_int_abs(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_type *type = instr->data_type;
 | |
|     struct hlsl_ir_node *arg, *neg;
 | |
|     struct hlsl_ir_expr *expr;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
| 
 | |
|     if (expr->op != HLSL_OP1_ABS)
 | |
|         return false;
 | |
|     if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR)
 | |
|         return false;
 | |
|     if (type->e.numeric.type != HLSL_TYPE_INT)
 | |
|         return false;
 | |
| 
 | |
|     arg = expr->operands[0].node;
 | |
| 
 | |
|     neg = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg, &instr->loc);
 | |
|     hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MAX, arg, neg);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool lower_int_dot(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg1, *arg2, *mult, *comps[4] = {0}, *res;
 | |
|     struct hlsl_type *type = instr->data_type;
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     unsigned int i, dimx;
 | |
|     bool is_bool;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
| 
 | |
|     if (expr->op != HLSL_OP2_DOT)
 | |
|         return false;
 | |
| 
 | |
|     if (hlsl_type_is_integer(type))
 | |
|     {
 | |
|         arg1 = expr->operands[0].node;
 | |
|         arg2 = expr->operands[1].node;
 | |
|         VKD3D_ASSERT(arg1->data_type->e.numeric.dimx == arg2->data_type->e.numeric.dimx);
 | |
|         dimx = arg1->data_type->e.numeric.dimx;
 | |
|         is_bool = type->e.numeric.type == HLSL_TYPE_BOOL;
 | |
| 
 | |
|         mult = hlsl_block_add_binary_expr(ctx, block, is_bool ? HLSL_OP2_LOGIC_AND : HLSL_OP2_MUL, arg1, arg2);
 | |
| 
 | |
|         for (i = 0; i < dimx; ++i)
 | |
|         {
 | |
|             uint32_t s = hlsl_swizzle_from_writemask(1 << i);
 | |
| 
 | |
|             comps[i] = hlsl_block_add_swizzle(ctx, block, s, 1, mult, &instr->loc);
 | |
|         }
 | |
| 
 | |
|         res = comps[0];
 | |
|         for (i = 1; i < dimx; ++i)
 | |
|             res = hlsl_block_add_binary_expr(ctx, block, is_bool ? HLSL_OP2_LOGIC_OR : HLSL_OP2_ADD, res, comps[i]);
 | |
| 
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool lower_float_modulus(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_node *arg1, *arg2, *mul1, *neg1, *ge, *neg2, *div, *mul2, *frc, *cond, *one;
 | |
|     struct hlsl_type *type = instr->data_type, *btype;
 | |
|     struct hlsl_constant_value one_value;
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
|     expr = hlsl_ir_expr(instr);
 | |
|     arg1 = expr->operands[0].node;
 | |
|     arg2 = expr->operands[1].node;
 | |
|     if (expr->op != HLSL_OP2_MOD)
 | |
|         return false;
 | |
|     if (type->class != HLSL_CLASS_SCALAR && type->class != HLSL_CLASS_VECTOR)
 | |
|         return false;
 | |
|     if (type->e.numeric.type != HLSL_TYPE_FLOAT)
 | |
|         return false;
 | |
|     btype = hlsl_get_numeric_type(ctx, type->class, HLSL_TYPE_BOOL, type->e.numeric.dimx, type->e.numeric.dimy);
 | |
| 
 | |
|     mul1 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, arg2, arg1);
 | |
|     neg1 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, mul1, &instr->loc);
 | |
| 
 | |
|     ge = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_GEQUAL, mul1, neg1);
 | |
|     ge->data_type = btype;
 | |
| 
 | |
|     neg2 = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_NEG, arg2, &instr->loc);
 | |
|     cond = hlsl_add_conditional(ctx, block, ge, arg2, neg2);
 | |
| 
 | |
|     for (i = 0; i < type->e.numeric.dimx; ++i)
 | |
|         one_value.u[i].f = 1.0f;
 | |
|     if (!(one = hlsl_new_constant(ctx, type, &one_value, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, one);
 | |
| 
 | |
|     div = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_DIV, one, cond);
 | |
|     mul2 = hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, div, arg1);
 | |
|     frc = hlsl_block_add_unary_expr(ctx, block, HLSL_OP1_FRACT, mul2, &instr->loc);
 | |
|     hlsl_block_add_binary_expr(ctx, block, HLSL_OP2_MUL, frc, cond);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool lower_discard_neg(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     struct hlsl_ir_node *zero, *bool_false, *or, *cmp, *load;
 | |
|     static const struct hlsl_constant_value zero_value;
 | |
|     struct hlsl_type *arg_type, *cmp_type;
 | |
|     struct hlsl_ir_node *operands[HLSL_MAX_OPERANDS] = { 0 };
 | |
|     struct hlsl_ir_jump *jump;
 | |
|     struct hlsl_block block;
 | |
|     unsigned int i, count;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_JUMP)
 | |
|         return false;
 | |
|     jump = hlsl_ir_jump(instr);
 | |
|     if (jump->type != HLSL_IR_JUMP_DISCARD_NEG)
 | |
|         return false;
 | |
| 
 | |
|     hlsl_block_init(&block);
 | |
| 
 | |
|     arg_type = jump->condition.node->data_type;
 | |
|     if (!(zero = hlsl_new_constant(ctx, arg_type, &zero_value, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(&block, zero);
 | |
| 
 | |
|     operands[0] = jump->condition.node;
 | |
|     operands[1] = zero;
 | |
|     cmp_type = hlsl_get_numeric_type(ctx, arg_type->class, HLSL_TYPE_BOOL,
 | |
|             arg_type->e.numeric.dimx, arg_type->e.numeric.dimy);
 | |
|     cmp = hlsl_block_add_expr(ctx, &block, HLSL_OP2_LESS, operands, cmp_type, &instr->loc);
 | |
| 
 | |
|     if (!(bool_false = hlsl_new_constant(ctx, hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &zero_value, &instr->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(&block, bool_false);
 | |
| 
 | |
|     or = bool_false;
 | |
| 
 | |
|     count = hlsl_type_component_count(cmp_type);
 | |
|     for (i = 0; i < count; ++i)
 | |
|     {
 | |
|         load = hlsl_add_load_component(ctx, &block, cmp, i, &instr->loc);
 | |
|         or = hlsl_block_add_binary_expr(ctx, &block, HLSL_OP2_LOGIC_OR, or, load);
 | |
|     }
 | |
| 
 | |
|     list_move_tail(&instr->entry, &block.instrs);
 | |
|     hlsl_src_remove(&jump->condition);
 | |
|     hlsl_src_from_node(&jump->condition, or);
 | |
|     jump->type = HLSL_IR_JUMP_DISCARD_NZ;
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool lower_discard_nz(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     struct hlsl_ir_node *cond, *cond_cast, *abs, *neg;
 | |
|     struct hlsl_type *float_type;
 | |
|     struct hlsl_ir_jump *jump;
 | |
|     struct hlsl_block block;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_JUMP)
 | |
|         return false;
 | |
|     jump = hlsl_ir_jump(instr);
 | |
|     if (jump->type != HLSL_IR_JUMP_DISCARD_NZ)
 | |
|         return false;
 | |
| 
 | |
|     cond = jump->condition.node;
 | |
|     float_type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, cond->data_type->e.numeric.dimx);
 | |
| 
 | |
|     hlsl_block_init(&block);
 | |
| 
 | |
|     cond_cast = hlsl_block_add_cast(ctx, &block, cond, float_type, &instr->loc);
 | |
|     abs = hlsl_block_add_unary_expr(ctx, &block, HLSL_OP1_ABS, cond_cast, &instr->loc);
 | |
|     neg = hlsl_block_add_unary_expr(ctx, &block, HLSL_OP1_NEG, abs, &instr->loc);
 | |
| 
 | |
|     list_move_tail(&instr->entry, &block.instrs);
 | |
|     hlsl_src_remove(&jump->condition);
 | |
|     hlsl_src_from_node(&jump->condition, neg);
 | |
|     jump->type = HLSL_IR_JUMP_DISCARD_NEG;
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool dce(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     switch (instr->type)
 | |
|     {
 | |
|         case HLSL_IR_CONSTANT:
 | |
|         case HLSL_IR_COMPILE:
 | |
|         case HLSL_IR_EXPR:
 | |
|         case HLSL_IR_INDEX:
 | |
|         case HLSL_IR_LOAD:
 | |
|         case HLSL_IR_RESOURCE_LOAD:
 | |
|         case HLSL_IR_STRING_CONSTANT:
 | |
|         case HLSL_IR_SWIZZLE:
 | |
|         case HLSL_IR_SAMPLER_STATE:
 | |
|             if (list_empty(&instr->uses))
 | |
|             {
 | |
|                 list_remove(&instr->entry);
 | |
|                 hlsl_free_instr(instr);
 | |
|                 return true;
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         case HLSL_IR_STORE:
 | |
|         {
 | |
|             struct hlsl_ir_store *store = hlsl_ir_store(instr);
 | |
|             struct hlsl_ir_var *var = store->lhs.var;
 | |
| 
 | |
|             if (var->is_output_semantic)
 | |
|                 break;
 | |
| 
 | |
|             if (var->last_read < instr->index)
 | |
|             {
 | |
|                 list_remove(&instr->entry);
 | |
|                 hlsl_free_instr(instr);
 | |
|                 return true;
 | |
|             }
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         case HLSL_IR_CALL:
 | |
|         case HLSL_IR_IF:
 | |
|         case HLSL_IR_INTERLOCKED:
 | |
|         case HLSL_IR_JUMP:
 | |
|         case HLSL_IR_LOOP:
 | |
|         case HLSL_IR_RESOURCE_STORE:
 | |
|         case HLSL_IR_SWITCH:
 | |
|         case HLSL_IR_SYNC:
 | |
|             break;
 | |
|         case HLSL_IR_STATEBLOCK_CONSTANT:
 | |
|             /* Stateblock constants should not appear in the shader program. */
 | |
|             vkd3d_unreachable();
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static void dump_function(struct rb_entry *entry, void *context)
 | |
| {
 | |
|     struct hlsl_ir_function *func = RB_ENTRY_VALUE(entry, struct hlsl_ir_function, entry);
 | |
|     struct hlsl_ir_function_decl *decl;
 | |
|     struct hlsl_ctx *ctx = context;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(decl, &func->overloads, struct hlsl_ir_function_decl, entry)
 | |
|     {
 | |
|         if (decl->has_body)
 | |
|             hlsl_dump_function(ctx, decl);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static bool mark_indexable_var(struct hlsl_ctx *ctx, struct hlsl_deref *deref,
 | |
|         struct hlsl_ir_node *instr)
 | |
| {
 | |
|     if (!deref->rel_offset.node)
 | |
|         return false;
 | |
| 
 | |
|     VKD3D_ASSERT(deref->var);
 | |
|     VKD3D_ASSERT(deref->rel_offset.node->type != HLSL_IR_CONSTANT);
 | |
|     deref->var->indexable = true;
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static void mark_indexable_vars(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
 | |
| {
 | |
|     struct hlsl_scope *scope;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
 | |
|     {
 | |
|         LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
 | |
|             var->indexable = false;
 | |
|     }
 | |
| 
 | |
|     transform_derefs(ctx, mark_indexable_var, &entry_func->body);
 | |
| }
 | |
| 
 | |
| static char get_regset_name(enum hlsl_regset regset)
 | |
| {
 | |
|     switch (regset)
 | |
|     {
 | |
|         case HLSL_REGSET_SAMPLERS:
 | |
|             return 's';
 | |
|         case HLSL_REGSET_TEXTURES:
 | |
|             return 't';
 | |
|         case HLSL_REGSET_UAVS:
 | |
|             return 'u';
 | |
|         case HLSL_REGSET_STREAM_OUTPUTS:
 | |
|             return 'm';
 | |
|         case HLSL_REGSET_NUMERIC:
 | |
|             vkd3d_unreachable();
 | |
|     }
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| static void allocate_register_reservations(struct hlsl_ctx *ctx, struct list *extern_vars)
 | |
| {
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         const struct hlsl_reg_reservation *reservation = &var->reg_reservation;
 | |
|         unsigned int r;
 | |
| 
 | |
|         if (reservation->reg_type)
 | |
|         {
 | |
|             for (r = 0; r <= HLSL_REGSET_LAST_OBJECT; ++r)
 | |
|             {
 | |
|                 if (var->regs[r].allocation_size > 0)
 | |
|                 {
 | |
|                     if (reservation->reg_type != get_regset_name(r))
 | |
|                     {
 | |
|                         struct vkd3d_string_buffer *type_string;
 | |
| 
 | |
|                         /* We can throw this error because resources can only span across a single
 | |
|                          * regset, but we have to check for multiple regsets if we support register
 | |
|                          * reservations for structs for SM5. */
 | |
|                         type_string = hlsl_type_to_string(ctx, var->data_type);
 | |
|                         hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
 | |
|                                 "Object of type '%s' must be bound to register type '%c'.",
 | |
|                                 type_string->buffer, get_regset_name(r));
 | |
|                         hlsl_release_string_buffer(ctx, type_string);
 | |
|                     }
 | |
|                     else
 | |
|                     {
 | |
|                         var->regs[r].allocated = true;
 | |
|                         var->regs[r].space = reservation->reg_space;
 | |
|                         var->regs[r].index = reservation->reg_index;
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void deref_mark_last_read(struct hlsl_deref *deref, unsigned int last_read)
 | |
| {
 | |
|     unsigned int i;
 | |
| 
 | |
|     if (hlsl_deref_is_lowered(deref))
 | |
|     {
 | |
|         if (deref->rel_offset.node)
 | |
|             deref->rel_offset.node->last_read = last_read;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         for (i = 0; i < deref->path_len; ++i)
 | |
|             deref->path[i].node->last_read = last_read;
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Compute the earliest and latest liveness for each variable. In the case that
 | |
|  * a variable is accessed inside of a loop, we promote its liveness to extend
 | |
|  * to at least the range of the entire loop. We also do this for nodes, so that
 | |
|  * nodes produced before the loop have their temp register protected from being
 | |
|  * overridden after the last read within an iteration. */
 | |
| static void compute_liveness_recurse(struct hlsl_block *block, unsigned int loop_first, unsigned int loop_last)
 | |
| {
 | |
|     struct hlsl_ir_node *instr;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         const unsigned int last_read = loop_last ? max(instr->index, loop_last) : instr->index;
 | |
| 
 | |
|         switch (instr->type)
 | |
|         {
 | |
|         case HLSL_IR_CALL:
 | |
|             /* We should have inlined all calls before computing liveness. */
 | |
|             vkd3d_unreachable();
 | |
|         case HLSL_IR_STATEBLOCK_CONSTANT:
 | |
|             /* Stateblock constants should not appear in the shader program. */
 | |
|             vkd3d_unreachable();
 | |
| 
 | |
|         case HLSL_IR_STORE:
 | |
|         {
 | |
|             struct hlsl_ir_store *store = hlsl_ir_store(instr);
 | |
| 
 | |
|             var = store->lhs.var;
 | |
|             if (!var->first_write)
 | |
|                 var->first_write = loop_first ? min(instr->index, loop_first) : instr->index;
 | |
|             store->rhs.node->last_read = last_read;
 | |
|             deref_mark_last_read(&store->lhs, last_read);
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_EXPR:
 | |
|         {
 | |
|             struct hlsl_ir_expr *expr = hlsl_ir_expr(instr);
 | |
|             unsigned int i;
 | |
| 
 | |
|             for (i = 0; i < ARRAY_SIZE(expr->operands) && expr->operands[i].node; ++i)
 | |
|                 expr->operands[i].node->last_read = last_read;
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_IF:
 | |
|         {
 | |
|             struct hlsl_ir_if *iff = hlsl_ir_if(instr);
 | |
| 
 | |
|             compute_liveness_recurse(&iff->then_block, loop_first, loop_last);
 | |
|             compute_liveness_recurse(&iff->else_block, loop_first, loop_last);
 | |
|             iff->condition.node->last_read = last_read;
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_LOAD:
 | |
|         {
 | |
|             struct hlsl_ir_load *load = hlsl_ir_load(instr);
 | |
| 
 | |
|             var = load->src.var;
 | |
|             var->last_read = max(var->last_read, last_read);
 | |
|             deref_mark_last_read(&load->src, last_read);
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_LOOP:
 | |
|         {
 | |
|             struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
 | |
| 
 | |
|             compute_liveness_recurse(&loop->body, loop_first ? loop_first : instr->index,
 | |
|                     loop_last ? loop_last : loop->next_index);
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_RESOURCE_LOAD:
 | |
|         {
 | |
|             struct hlsl_ir_resource_load *load = hlsl_ir_resource_load(instr);
 | |
| 
 | |
|             var = load->resource.var;
 | |
|             var->last_read = max(var->last_read, last_read);
 | |
|             deref_mark_last_read(&load->resource, last_read);
 | |
| 
 | |
|             if ((var = load->sampler.var))
 | |
|             {
 | |
|                 var->last_read = max(var->last_read, last_read);
 | |
|                 deref_mark_last_read(&load->sampler, last_read);
 | |
|             }
 | |
| 
 | |
|             if (load->coords.node)
 | |
|                 load->coords.node->last_read = last_read;
 | |
|             if (load->texel_offset.node)
 | |
|                 load->texel_offset.node->last_read = last_read;
 | |
|             if (load->lod.node)
 | |
|                 load->lod.node->last_read = last_read;
 | |
|             if (load->ddx.node)
 | |
|                 load->ddx.node->last_read = last_read;
 | |
|             if (load->ddy.node)
 | |
|                 load->ddy.node->last_read = last_read;
 | |
|             if (load->sample_index.node)
 | |
|                 load->sample_index.node->last_read = last_read;
 | |
|             if (load->cmp.node)
 | |
|                 load->cmp.node->last_read = last_read;
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_RESOURCE_STORE:
 | |
|         {
 | |
|             struct hlsl_ir_resource_store *store = hlsl_ir_resource_store(instr);
 | |
| 
 | |
|             var = store->resource.var;
 | |
|             var->last_read = max(var->last_read, last_read);
 | |
|             deref_mark_last_read(&store->resource, last_read);
 | |
|             if (store->coords.node)
 | |
|                 store->coords.node->last_read = last_read;
 | |
|             if (store->value.node)
 | |
|                 store->value.node->last_read = last_read;
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_SWIZZLE:
 | |
|         {
 | |
|             struct hlsl_ir_swizzle *swizzle = hlsl_ir_swizzle(instr);
 | |
| 
 | |
|             swizzle->val.node->last_read = last_read;
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_INDEX:
 | |
|         {
 | |
|             struct hlsl_ir_index *index = hlsl_ir_index(instr);
 | |
| 
 | |
|             index->val.node->last_read = last_read;
 | |
|             index->idx.node->last_read = last_read;
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_INTERLOCKED:
 | |
|         {
 | |
|             struct hlsl_ir_interlocked *interlocked = hlsl_ir_interlocked(instr);
 | |
| 
 | |
|             var = interlocked->dst.var;
 | |
|             var->last_read = max(var->last_read, last_read);
 | |
|             deref_mark_last_read(&interlocked->dst, last_read);
 | |
|             interlocked->coords.node->last_read = last_read;
 | |
|             interlocked->value.node->last_read = last_read;
 | |
|             if (interlocked->cmp_value.node)
 | |
|                 interlocked->cmp_value.node->last_read = last_read;
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_JUMP:
 | |
|         {
 | |
|             struct hlsl_ir_jump *jump = hlsl_ir_jump(instr);
 | |
| 
 | |
|             if (jump->condition.node)
 | |
|                 jump->condition.node->last_read = last_read;
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_SWITCH:
 | |
|         {
 | |
|             struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
 | |
|             struct hlsl_ir_switch_case *c;
 | |
| 
 | |
|             LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|                 compute_liveness_recurse(&c->body, loop_first, loop_last);
 | |
|             s->selector.node->last_read = last_read;
 | |
|             break;
 | |
|         }
 | |
|         case HLSL_IR_CONSTANT:
 | |
|         case HLSL_IR_STRING_CONSTANT:
 | |
|         case HLSL_IR_SYNC:
 | |
|             break;
 | |
|         case HLSL_IR_COMPILE:
 | |
|         case HLSL_IR_SAMPLER_STATE:
 | |
|             /* These types are skipped as they are only relevant to effects. */
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void compute_liveness(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
 | |
| {
 | |
|     struct hlsl_scope *scope;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     if (ctx->result)
 | |
|         return;
 | |
| 
 | |
|     index_instructions(&entry_func->body, 1);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
 | |
|     {
 | |
|         LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
 | |
|             var->first_write = var->last_read = 0;
 | |
|     }
 | |
| 
 | |
|     compute_liveness_recurse(&entry_func->body, 0, 0);
 | |
| }
 | |
| 
 | |
| static void mark_vars_usage(struct hlsl_ctx *ctx)
 | |
| {
 | |
|     struct hlsl_scope *scope;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
 | |
|     {
 | |
|         LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
 | |
|         {
 | |
|             if (var->last_read)
 | |
|                 var->is_read = true;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| struct register_allocator
 | |
| {
 | |
|     struct allocation
 | |
|     {
 | |
|         uint32_t reg;
 | |
|         unsigned int writemask;
 | |
|         unsigned int first_write, last_read;
 | |
| 
 | |
|         /* Two allocations with different mode can't share the same register. */
 | |
|         int mode;
 | |
|         /* If an allocation is VIP, no new allocations can be made in the
 | |
|          * register unless they are VIP as well. */
 | |
|         bool vip;
 | |
|     } *allocations;
 | |
|     size_t count, capacity;
 | |
| 
 | |
|     /* Indexable temps are allocated separately and always keep their index regardless of their
 | |
|      * lifetime. */
 | |
|     uint32_t indexable_count;
 | |
| 
 | |
|     /* Total number of registers allocated so far. Used to declare sm4 temp count. */
 | |
|     uint32_t reg_count;
 | |
| 
 | |
|     /* Special flag so allocations that can share registers prioritize those
 | |
|      * that will result in smaller writemasks.
 | |
|      * For instance, a single-register allocation would prefer to share a register
 | |
|      * whose .xy components are already allocated (becoming .z) instead of a
 | |
|      * register whose .xyz components are already allocated (becoming .w). */
 | |
|     bool prioritize_smaller_writemasks;
 | |
| };
 | |
| 
 | |
| static unsigned int get_available_writemask(const struct register_allocator *allocator,
 | |
|         unsigned int first_write, unsigned int last_read, uint32_t reg_idx, int mode, bool vip)
 | |
| {
 | |
|     unsigned int writemask = VKD3DSP_WRITEMASK_ALL;
 | |
|     size_t i;
 | |
| 
 | |
|     for (i = 0; i < allocator->count; ++i)
 | |
|     {
 | |
|         const struct allocation *allocation = &allocator->allocations[i];
 | |
| 
 | |
|         /* We do not overlap if first write == last read:
 | |
|          * this is the case where we are allocating the result of that
 | |
|          * expression, e.g. "add r0, r0, r1". */
 | |
| 
 | |
|         if (allocation->reg == reg_idx
 | |
|                 && first_write < allocation->last_read && last_read > allocation->first_write)
 | |
|         {
 | |
|             writemask &= ~allocation->writemask;
 | |
|             if (allocation->mode != mode)
 | |
|                 writemask = 0;
 | |
|             if (allocation->vip && !vip)
 | |
|                 writemask = 0;
 | |
|         }
 | |
| 
 | |
|         if (!writemask)
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     return writemask;
 | |
| }
 | |
| 
 | |
| static void record_allocation(struct hlsl_ctx *ctx, struct register_allocator *allocator, uint32_t reg_idx,
 | |
|         unsigned int writemask, unsigned int first_write, unsigned int last_read, int mode, bool vip)
 | |
| {
 | |
|     struct allocation *allocation;
 | |
| 
 | |
|     if (!hlsl_array_reserve(ctx, (void **)&allocator->allocations, &allocator->capacity,
 | |
|             allocator->count + 1, sizeof(*allocator->allocations)))
 | |
|         return;
 | |
| 
 | |
|     allocation = &allocator->allocations[allocator->count++];
 | |
|     allocation->reg = reg_idx;
 | |
|     allocation->writemask = writemask;
 | |
|     allocation->first_write = first_write;
 | |
|     allocation->last_read = last_read;
 | |
|     allocation->mode = mode;
 | |
|     allocation->vip = vip;
 | |
| 
 | |
|     allocator->reg_count = max(allocator->reg_count, reg_idx + 1);
 | |
| }
 | |
| 
 | |
| /* Allocates a register (or some components of it) within the register allocator.
 | |
|  * 'reg_size' is the number of register components to be reserved.
 | |
|  * 'component_count' is the number of components for the hlsl_reg's
 | |
|  *      writemask, which can be smaller than 'reg_size'. For instance, sm1
 | |
|  *      floats and vectors allocate the whole register even if they are not
 | |
|  *      using all components.
 | |
|  * 'mode' can be provided to avoid allocating on a register that already has an
 | |
|  *      allocation with a different mode.
 | |
|  * 'force_align' can be used so that the allocation always start in '.x'.
 | |
|  * 'vip' can be used so that no new allocations can be made in the given register
 | |
|  *      unless they are 'vip' as well. */
 | |
| static struct hlsl_reg allocate_register(struct hlsl_ctx *ctx, struct register_allocator *allocator,
 | |
|         unsigned int first_write, unsigned int last_read, unsigned int reg_size,
 | |
|         unsigned int component_count, int mode, bool force_align, bool vip)
 | |
| {
 | |
|     struct hlsl_reg ret = {.allocation_size = 1, .allocated = true};
 | |
|     unsigned int required_size = force_align ? 4 : reg_size;
 | |
|     unsigned int pref;
 | |
| 
 | |
|     VKD3D_ASSERT(component_count <= reg_size);
 | |
| 
 | |
|     pref = allocator->prioritize_smaller_writemasks ? 4 : required_size;
 | |
|     for (; pref >= required_size; --pref)
 | |
|     {
 | |
|         for (uint32_t reg_idx = 0; reg_idx < allocator->reg_count; ++reg_idx)
 | |
|         {
 | |
|             unsigned int available_writemask = get_available_writemask(allocator,
 | |
|                     first_write, last_read, reg_idx, mode, vip);
 | |
| 
 | |
|             if (vkd3d_popcount(available_writemask) >= pref)
 | |
|             {
 | |
|                 unsigned int writemask = hlsl_combine_writemasks(available_writemask,
 | |
|                         vkd3d_write_mask_from_component_count(reg_size));
 | |
| 
 | |
|                 ret.id = reg_idx;
 | |
|                 ret.writemask = hlsl_combine_writemasks(writemask,
 | |
|                         vkd3d_write_mask_from_component_count(component_count));
 | |
| 
 | |
|                 record_allocation(ctx, allocator, reg_idx, writemask, first_write, last_read, mode, vip);
 | |
|                 return ret;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     ret.id = allocator->reg_count;
 | |
|     ret.writemask = vkd3d_write_mask_from_component_count(component_count);
 | |
|     record_allocation(ctx, allocator, allocator->reg_count,
 | |
|             vkd3d_write_mask_from_component_count(reg_size), first_write, last_read, mode, vip);
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| /* Allocate a register with writemask, while reserving reg_writemask. */
 | |
| static struct hlsl_reg allocate_register_with_masks(struct hlsl_ctx *ctx,
 | |
|         struct register_allocator *allocator, unsigned int first_write, unsigned int last_read,
 | |
|         uint32_t reg_writemask, uint32_t writemask, int mode, bool vip)
 | |
| {
 | |
|     struct hlsl_reg ret = {0};
 | |
|     uint32_t reg_idx;
 | |
| 
 | |
|     VKD3D_ASSERT((reg_writemask & writemask) == writemask);
 | |
| 
 | |
|     for (reg_idx = 0;; ++reg_idx)
 | |
|     {
 | |
|         if ((get_available_writemask(allocator, first_write, last_read,
 | |
|                 reg_idx, mode, vip) & reg_writemask) == reg_writemask)
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     record_allocation(ctx, allocator, reg_idx, reg_writemask, first_write, last_read, mode, vip);
 | |
| 
 | |
|     ret.id = reg_idx;
 | |
|     ret.allocation_size = 1;
 | |
|     ret.writemask = writemask;
 | |
|     ret.allocated = true;
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static bool is_range_available(const struct register_allocator *allocator, unsigned int first_write,
 | |
|         unsigned int last_read, uint32_t reg_idx, unsigned int reg_size, int mode, bool vip)
 | |
| {
 | |
|     unsigned int last_reg_mask = (1u << (reg_size % 4)) - 1;
 | |
|     unsigned int writemask;
 | |
|     uint32_t i;
 | |
| 
 | |
|     for (i = 0; i < (reg_size / 4); ++i)
 | |
|     {
 | |
|         writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + i, mode, vip);
 | |
|         if (writemask != VKD3DSP_WRITEMASK_ALL)
 | |
|             return false;
 | |
|     }
 | |
|     writemask = get_available_writemask(allocator, first_write, last_read, reg_idx + (reg_size / 4), mode, vip);
 | |
|     if ((writemask & last_reg_mask) != last_reg_mask)
 | |
|         return false;
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static struct hlsl_reg allocate_range(struct hlsl_ctx *ctx, struct register_allocator *allocator,
 | |
|         unsigned int first_write, unsigned int last_read, unsigned int reg_size, int mode, bool vip)
 | |
| {
 | |
|     struct hlsl_reg ret = {0};
 | |
|     uint32_t reg_idx;
 | |
|     unsigned int i;
 | |
| 
 | |
|     for (reg_idx = 0;; ++reg_idx)
 | |
|     {
 | |
|         if (is_range_available(allocator, first_write, last_read, reg_idx, reg_size, mode, vip))
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     for (i = 0; i < reg_size / 4; ++i)
 | |
|         record_allocation(ctx, allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, first_write, last_read, mode, vip);
 | |
|     if (reg_size % 4)
 | |
|         record_allocation(ctx, allocator, reg_idx + (reg_size / 4),
 | |
|                 (1u << (reg_size % 4)) - 1, first_write, last_read, mode, vip);
 | |
| 
 | |
|     ret.id = reg_idx;
 | |
|     ret.allocation_size = align(reg_size, 4) / 4;
 | |
|     ret.allocated = true;
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static struct hlsl_reg allocate_numeric_registers_for_type(struct hlsl_ctx *ctx, struct register_allocator *allocator,
 | |
|         unsigned int first_write, unsigned int last_read, const struct hlsl_type *type)
 | |
| {
 | |
|     unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC];
 | |
| 
 | |
|     /* FIXME: We could potentially pack structs or arrays more efficiently... */
 | |
| 
 | |
|     if (type->class <= HLSL_CLASS_VECTOR)
 | |
|         return allocate_register(ctx, allocator, first_write, last_read,
 | |
|                 type->e.numeric.dimx, type->e.numeric.dimx, 0, false, false);
 | |
|     else
 | |
|         return allocate_range(ctx, allocator, first_write, last_read, reg_size, 0, false);
 | |
| }
 | |
| 
 | |
| static const char *debug_register(char class, struct hlsl_reg reg, const struct hlsl_type *type)
 | |
| {
 | |
|     static const char writemask_offset[] = {'w','x','y','z'};
 | |
|     unsigned int reg_size = type->reg_size[HLSL_REGSET_NUMERIC];
 | |
| 
 | |
|     if (reg_size > 4 && !hlsl_type_is_patch_array(type))
 | |
|     {
 | |
|         if (reg_size & 3)
 | |
|             return vkd3d_dbg_sprintf("%c%u-%c%u.%c", class, reg.id, class, reg.id + (reg_size / 4),
 | |
|                     writemask_offset[reg_size & 3]);
 | |
| 
 | |
|         return vkd3d_dbg_sprintf("%c%u-%c%u", class, reg.id, class, reg.id + (reg_size / 4) - 1);
 | |
|     }
 | |
|     return vkd3d_dbg_sprintf("%c%u%s", class, reg.id, debug_hlsl_writemask(reg.writemask));
 | |
| }
 | |
| 
 | |
| static bool track_object_components_sampler_dim(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     struct hlsl_ir_resource_load *load;
 | |
|     struct hlsl_ir_var *var;
 | |
|     enum hlsl_regset regset;
 | |
|     unsigned int index;
 | |
| 
 | |
|     if (instr->type != HLSL_IR_RESOURCE_LOAD)
 | |
|         return false;
 | |
| 
 | |
|     load = hlsl_ir_resource_load(instr);
 | |
|     var = load->resource.var;
 | |
| 
 | |
|     regset = hlsl_deref_get_regset(ctx, &load->resource);
 | |
|     if (!hlsl_regset_index_from_deref(ctx, &load->resource, regset, &index))
 | |
|         return false;
 | |
| 
 | |
|     if (regset == HLSL_REGSET_SAMPLERS)
 | |
|     {
 | |
|         enum hlsl_sampler_dim dim;
 | |
| 
 | |
|         VKD3D_ASSERT(!load->sampler.var);
 | |
| 
 | |
|         dim = var->objects_usage[regset][index].sampler_dim;
 | |
|         if (dim != load->sampling_dim)
 | |
|         {
 | |
|             if (dim == HLSL_SAMPLER_DIM_GENERIC)
 | |
|             {
 | |
|                 var->objects_usage[regset][index].first_sampler_dim_loc = instr->loc;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INCONSISTENT_SAMPLER,
 | |
|                         "Inconsistent generic sampler usage dimension.");
 | |
|                 hlsl_note(ctx, &var->objects_usage[regset][index].first_sampler_dim_loc,
 | |
|                         VKD3D_SHADER_LOG_ERROR, "First use is here.");
 | |
|                 return false;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     var->objects_usage[regset][index].sampler_dim = load->sampling_dim;
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static void register_deref_usage(struct hlsl_ctx *ctx, struct hlsl_deref *deref)
 | |
| {
 | |
|     struct hlsl_ir_var *var = deref->var;
 | |
|     enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref);
 | |
|     uint32_t required_bind_count;
 | |
|     struct hlsl_type *type;
 | |
|     unsigned int index;
 | |
| 
 | |
|     hlsl_regset_index_from_deref(ctx, deref, regset, &index);
 | |
| 
 | |
|     if (regset <= HLSL_REGSET_LAST_OBJECT)
 | |
|     {
 | |
|         var->objects_usage[regset][index].used = true;
 | |
|         var->bind_count[regset] = max(var->bind_count[regset], index + 1);
 | |
|     }
 | |
|     else if (regset == HLSL_REGSET_NUMERIC)
 | |
|     {
 | |
|         type = hlsl_deref_get_type(ctx, deref);
 | |
| 
 | |
|         required_bind_count = align(index + type->reg_size[regset], 4) / 4;
 | |
|         var->bind_count[regset] = max(var->bind_count[regset], required_bind_count);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         vkd3d_unreachable();
 | |
|     }
 | |
| }
 | |
| 
 | |
| static bool track_components_usage(struct hlsl_ctx *ctx, struct hlsl_ir_node *instr, void *context)
 | |
| {
 | |
|     switch (instr->type)
 | |
|     {
 | |
|         case HLSL_IR_LOAD:
 | |
|         {
 | |
|             struct hlsl_ir_load *load = hlsl_ir_load(instr);
 | |
| 
 | |
|             if (!load->src.var->is_uniform)
 | |
|                 return false;
 | |
| 
 | |
|             /* These will are handled by validate_static_object_references(). */
 | |
|             if (hlsl_deref_get_regset(ctx, &load->src) != HLSL_REGSET_NUMERIC)
 | |
|                 return false;
 | |
| 
 | |
|             register_deref_usage(ctx, &load->src);
 | |
|             break;
 | |
|         }
 | |
| 
 | |
|         case HLSL_IR_RESOURCE_LOAD:
 | |
|             register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->resource);
 | |
|             if (hlsl_ir_resource_load(instr)->sampler.var)
 | |
|                 register_deref_usage(ctx, &hlsl_ir_resource_load(instr)->sampler);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_IR_RESOURCE_STORE:
 | |
|             register_deref_usage(ctx, &hlsl_ir_resource_store(instr)->resource);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_IR_INTERLOCKED:
 | |
|             register_deref_usage(ctx, &hlsl_ir_interlocked(instr)->dst);
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static void calculate_resource_register_counts(struct hlsl_ctx *ctx)
 | |
| {
 | |
|     struct hlsl_ir_var *var;
 | |
|     struct hlsl_type *type;
 | |
|     unsigned int k;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         type = var->data_type;
 | |
| 
 | |
|         for (k = 0; k <= HLSL_REGSET_LAST_OBJECT; ++k)
 | |
|         {
 | |
|             bool is_separated = var->is_separated_resource;
 | |
| 
 | |
|             if (var->bind_count[k] > 0)
 | |
|                 var->regs[k].allocation_size = (k == HLSL_REGSET_SAMPLERS || is_separated) ? var->bind_count[k] : type->reg_size[k];
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void allocate_instr_temp_register(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_ir_node *instr, struct register_allocator *allocator)
 | |
| {
 | |
|     unsigned int reg_writemask = 0, dst_writemask = 0;
 | |
| 
 | |
|     if (instr->reg.allocated || !instr->last_read)
 | |
|         return;
 | |
| 
 | |
|     if (instr->type == HLSL_IR_EXPR)
 | |
|     {
 | |
|         switch (hlsl_ir_expr(instr)->op)
 | |
|         {
 | |
|             case HLSL_OP1_COS_REDUCED:
 | |
|                 dst_writemask = VKD3DSP_WRITEMASK_0;
 | |
|                 reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_0;
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_OP1_SIN_REDUCED:
 | |
|                 dst_writemask = VKD3DSP_WRITEMASK_1;
 | |
|                 reg_writemask = ctx->profile->major_version < 3 ? (1 << 3) - 1 : VKD3DSP_WRITEMASK_1;
 | |
|                 break;
 | |
| 
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (reg_writemask)
 | |
|         instr->reg = allocate_register_with_masks(ctx, allocator, instr->index,
 | |
|                 instr->last_read, reg_writemask, dst_writemask, 0, false);
 | |
|     else
 | |
|         instr->reg = allocate_numeric_registers_for_type(ctx, allocator,
 | |
|                 instr->index, instr->last_read, instr->data_type);
 | |
| 
 | |
|     TRACE("Allocated anonymous expression @%u to %s (liveness %u-%u).\n", instr->index,
 | |
|             debug_register('r', instr->reg, instr->data_type), instr->index, instr->last_read);
 | |
| }
 | |
| 
 | |
| static void allocate_variable_temp_register(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_ir_var *var, struct register_allocator *allocator)
 | |
| {
 | |
|     if (var->is_input_semantic || var->is_output_semantic || var->is_uniform)
 | |
|         return;
 | |
| 
 | |
|     if (!var->regs[HLSL_REGSET_NUMERIC].allocated && var->last_read)
 | |
|     {
 | |
|         if (var->indexable)
 | |
|         {
 | |
|             var->regs[HLSL_REGSET_NUMERIC].id = allocator->indexable_count++;
 | |
|             var->regs[HLSL_REGSET_NUMERIC].allocation_size = 1;
 | |
|             var->regs[HLSL_REGSET_NUMERIC].writemask = 0;
 | |
|             var->regs[HLSL_REGSET_NUMERIC].allocated = true;
 | |
| 
 | |
|             TRACE("Allocated %s to x%u[].\n", var->name, var->regs[HLSL_REGSET_NUMERIC].id);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             var->regs[HLSL_REGSET_NUMERIC] = allocate_numeric_registers_for_type(ctx, allocator,
 | |
|                     var->first_write, var->last_read, var->data_type);
 | |
| 
 | |
|             TRACE("Allocated %s to %s (liveness %u-%u).\n", var->name, debug_register('r',
 | |
|                     var->regs[HLSL_REGSET_NUMERIC], var->data_type), var->first_write, var->last_read);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void allocate_temp_registers_recurse(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_block *block, struct register_allocator *allocator)
 | |
| {
 | |
|     struct hlsl_ir_node *instr;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         /* In SM4 all constants are inlined. */
 | |
|         if (ctx->profile->major_version >= 4 && instr->type == HLSL_IR_CONSTANT)
 | |
|             continue;
 | |
| 
 | |
|         allocate_instr_temp_register(ctx, instr, allocator);
 | |
| 
 | |
|         switch (instr->type)
 | |
|         {
 | |
|             case HLSL_IR_IF:
 | |
|             {
 | |
|                 struct hlsl_ir_if *iff = hlsl_ir_if(instr);
 | |
|                 allocate_temp_registers_recurse(ctx, &iff->then_block, allocator);
 | |
|                 allocate_temp_registers_recurse(ctx, &iff->else_block, allocator);
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             case HLSL_IR_LOAD:
 | |
|             {
 | |
|                 struct hlsl_ir_load *load = hlsl_ir_load(instr);
 | |
|                 /* We need to at least allocate a variable for undefs.
 | |
|                  * FIXME: We should probably find a way to remove them instead. */
 | |
|                 allocate_variable_temp_register(ctx, load->src.var, allocator);
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             case HLSL_IR_LOOP:
 | |
|             {
 | |
|                 struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
 | |
|                 allocate_temp_registers_recurse(ctx, &loop->body, allocator);
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             case HLSL_IR_STORE:
 | |
|             {
 | |
|                 struct hlsl_ir_store *store = hlsl_ir_store(instr);
 | |
|                 allocate_variable_temp_register(ctx, store->lhs.var, allocator);
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             case HLSL_IR_SWITCH:
 | |
|             {
 | |
|                 struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
 | |
|                 struct hlsl_ir_switch_case *c;
 | |
| 
 | |
|                 LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|                 {
 | |
|                     allocate_temp_registers_recurse(ctx, &c->body, allocator);
 | |
|                 }
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static bool find_constant(struct hlsl_ctx *ctx, const float *f, unsigned int count, struct hlsl_reg *ret)
 | |
| {
 | |
|     struct hlsl_constant_defs *defs = &ctx->constant_defs;
 | |
| 
 | |
|     for (size_t i = 0; i < defs->count; ++i)
 | |
|     {
 | |
|         const struct hlsl_constant_register *reg = &defs->regs[i];
 | |
| 
 | |
|         for (size_t j = 0; j <= 4 - count; ++j)
 | |
|         {
 | |
|             unsigned int writemask = ((1u << count) - 1) << j;
 | |
| 
 | |
|             if ((reg->allocated_mask & writemask) == writemask
 | |
|                     && !memcmp(f, ®->value.f[j], count * sizeof(float)))
 | |
|             {
 | |
|                 ret->id = reg->index;
 | |
|                 ret->allocation_size = 1;
 | |
|                 ret->writemask = writemask;
 | |
|                 ret->allocated = true;
 | |
|                 return true;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static void record_constant(struct hlsl_ctx *ctx, unsigned int component_index, float f,
 | |
|         const struct vkd3d_shader_location *loc)
 | |
| {
 | |
|     struct hlsl_constant_defs *defs = &ctx->constant_defs;
 | |
|     struct hlsl_constant_register *reg;
 | |
|     size_t i;
 | |
| 
 | |
|     for (i = 0; i < defs->count; ++i)
 | |
|     {
 | |
|         reg = &defs->regs[i];
 | |
|         if (reg->index == (component_index / 4))
 | |
|         {
 | |
|             reg->value.f[component_index % 4] = f;
 | |
|             reg->allocated_mask |= (1u << (component_index % 4));
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (!hlsl_array_reserve(ctx, (void **)&defs->regs, &defs->size, defs->count + 1, sizeof(*defs->regs)))
 | |
|         return;
 | |
|     reg = &defs->regs[defs->count++];
 | |
|     memset(reg, 0, sizeof(*reg));
 | |
|     reg->index = component_index / 4;
 | |
|     reg->value.f[component_index % 4] = f;
 | |
|     reg->allocated_mask = (1u << (component_index % 4));
 | |
|     reg->loc = *loc;
 | |
| }
 | |
| 
 | |
| static void allocate_const_registers_recurse(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_block *block, struct register_allocator *allocator)
 | |
| {
 | |
|     struct hlsl_ir_node *instr;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         switch (instr->type)
 | |
|         {
 | |
|             case HLSL_IR_CONSTANT:
 | |
|             {
 | |
|                 struct hlsl_ir_constant *constant = hlsl_ir_constant(instr);
 | |
|                 const struct hlsl_type *type = instr->data_type;
 | |
|                 float f[4] = {0};
 | |
| 
 | |
|                 VKD3D_ASSERT(hlsl_is_numeric_type(type));
 | |
|                 VKD3D_ASSERT(type->e.numeric.dimy == 1);
 | |
| 
 | |
|                 for (unsigned int i = 0; i < type->e.numeric.dimx; ++i)
 | |
|                 {
 | |
|                     const union hlsl_constant_value_component *value;
 | |
| 
 | |
|                     value = &constant->value.u[i];
 | |
| 
 | |
|                     switch (type->e.numeric.type)
 | |
|                     {
 | |
|                         case HLSL_TYPE_BOOL:
 | |
|                             f[i] = !!value->u;
 | |
|                             break;
 | |
| 
 | |
|                         case HLSL_TYPE_FLOAT:
 | |
|                         case HLSL_TYPE_HALF:
 | |
|                             f[i] = value->f;
 | |
|                             break;
 | |
| 
 | |
|                         case HLSL_TYPE_INT:
 | |
|                             f[i] = value->i;
 | |
|                             break;
 | |
| 
 | |
|                         case HLSL_TYPE_MIN16UINT:
 | |
|                         case HLSL_TYPE_UINT:
 | |
|                             f[i] = value->u;
 | |
|                             break;
 | |
| 
 | |
|                         case HLSL_TYPE_DOUBLE:
 | |
|                             FIXME("Double constant.\n");
 | |
|                             return;
 | |
|                     }
 | |
|                 }
 | |
| 
 | |
|                 if (find_constant(ctx, f, type->e.numeric.dimx, &constant->reg))
 | |
|                 {
 | |
|                     TRACE("Reusing already allocated constant %s for @%u.\n",
 | |
|                             debug_register('c', constant->reg, type), instr->index);
 | |
|                     break;
 | |
|                 }
 | |
| 
 | |
|                 constant->reg = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type);
 | |
|                 TRACE("Allocated constant @%u to %s.\n", instr->index, debug_register('c', constant->reg, type));
 | |
| 
 | |
|                 for (unsigned int x = 0, i = 0; x < 4; ++x)
 | |
|                 {
 | |
|                     if ((constant->reg.writemask & (1u << x)))
 | |
|                         record_constant(ctx, constant->reg.id * 4 + x, f[i++], &constant->node.loc);
 | |
|                 }
 | |
| 
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             case HLSL_IR_IF:
 | |
|             {
 | |
|                 struct hlsl_ir_if *iff = hlsl_ir_if(instr);
 | |
|                 allocate_const_registers_recurse(ctx, &iff->then_block, allocator);
 | |
|                 allocate_const_registers_recurse(ctx, &iff->else_block, allocator);
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             case HLSL_IR_LOOP:
 | |
|             {
 | |
|                 struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
 | |
|                 allocate_const_registers_recurse(ctx, &loop->body, allocator);
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             case HLSL_IR_SWITCH:
 | |
|             {
 | |
|                 struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
 | |
|                 struct hlsl_ir_switch_case *c;
 | |
| 
 | |
|                 LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|                 {
 | |
|                     allocate_const_registers_recurse(ctx, &c->body, allocator);
 | |
|                 }
 | |
|                 break;
 | |
|             }
 | |
| 
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void sort_uniform_by_bind_count(struct list *sorted, struct hlsl_ir_var *to_sort, enum hlsl_regset regset)
 | |
| {
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     list_remove(&to_sort->extern_entry);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         uint32_t to_sort_size = to_sort->bind_count[regset];
 | |
|         uint32_t var_size = var->bind_count[regset];
 | |
| 
 | |
|         if (to_sort_size > var_size)
 | |
|         {
 | |
|             list_add_before(&var->extern_entry, &to_sort->extern_entry);
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     list_add_tail(sorted, &to_sort->extern_entry);
 | |
| }
 | |
| 
 | |
| static void sort_uniforms_by_bind_count(struct hlsl_ctx *ctx, enum hlsl_regset regset)
 | |
| {
 | |
|     struct list sorted = LIST_INIT(sorted);
 | |
|     struct hlsl_ir_var *var, *next;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (var->is_uniform)
 | |
|             sort_uniform_by_bind_count(&sorted, var, regset);
 | |
|     }
 | |
|     list_move_tail(&ctx->extern_vars, &sorted);
 | |
| }
 | |
| 
 | |
| /* In SM2, 'sincos' expects specific constants as src1 and src2 arguments.
 | |
|  * These have to be referenced directly, i.e. as 'c' not 'r'. */
 | |
| static void allocate_sincos_const_registers(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct register_allocator *allocator)
 | |
| {
 | |
|     const struct hlsl_ir_node *instr;
 | |
|     struct hlsl_type *type;
 | |
| 
 | |
|     if (ctx->profile->major_version >= 3)
 | |
|         return;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(instr, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         if (instr->type == HLSL_IR_EXPR && (hlsl_ir_expr(instr)->op == HLSL_OP1_SIN_REDUCED
 | |
|                 || hlsl_ir_expr(instr)->op == HLSL_OP1_COS_REDUCED))
 | |
|         {
 | |
|             type = hlsl_get_vector_type(ctx, HLSL_TYPE_FLOAT, 4);
 | |
| 
 | |
|             ctx->d3dsincosconst1 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type);
 | |
|             TRACE("Allocated D3DSINCOSCONST1 to %s.\n", debug_register('c', ctx->d3dsincosconst1, type));
 | |
|             record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 0, -1.55009923e-06f, &instr->loc);
 | |
|             record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 1, -2.17013894e-05f, &instr->loc);
 | |
|             record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 2,  2.60416674e-03f, &instr->loc);
 | |
|             record_constant(ctx, ctx->d3dsincosconst1.id * 4 + 3,  2.60416680e-04f, &instr->loc);
 | |
| 
 | |
|             ctx->d3dsincosconst2 = allocate_numeric_registers_for_type(ctx, allocator, 1, UINT_MAX, type);
 | |
|             TRACE("Allocated D3DSINCOSCONST2 to %s.\n", debug_register('c', ctx->d3dsincosconst2, type));
 | |
|             record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 0, -2.08333340e-02f, &instr->loc);
 | |
|             record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 1, -1.25000000e-01f, &instr->loc);
 | |
|             record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 2,  1.00000000e+00f, &instr->loc);
 | |
|             record_constant(ctx, ctx->d3dsincosconst2.id * 4 + 3,  5.00000000e-01f, &instr->loc);
 | |
| 
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void allocate_const_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
 | |
| {
 | |
|     struct register_allocator allocator_used = {0};
 | |
|     struct register_allocator allocator = {0};
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     sort_uniforms_by_bind_count(ctx, HLSL_REGSET_NUMERIC);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC];
 | |
|         unsigned int bind_count = var->bind_count[HLSL_REGSET_NUMERIC];
 | |
| 
 | |
|         if (!var->is_uniform || reg_size == 0)
 | |
|             continue;
 | |
| 
 | |
|         if (var->reg_reservation.reg_type == 'c')
 | |
|         {
 | |
|             unsigned int reg_idx = var->reg_reservation.reg_index;
 | |
|             unsigned int i;
 | |
| 
 | |
|             VKD3D_ASSERT(reg_size % 4 == 0);
 | |
|             for (i = 0; i < reg_size / 4; ++i)
 | |
|             {
 | |
|                 if (i < bind_count)
 | |
|                 {
 | |
|                     if (get_available_writemask(&allocator_used, 1, UINT_MAX,
 | |
|                             reg_idx + i, 0, false) != VKD3DSP_WRITEMASK_ALL)
 | |
|                     {
 | |
|                         hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
 | |
|                                 "Overlapping register() reservations on 'c%u'.", reg_idx + i);
 | |
|                     }
 | |
|                     record_allocation(ctx, &allocator_used, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false);
 | |
|                 }
 | |
|                 record_allocation(ctx, &allocator, reg_idx + i, VKD3DSP_WRITEMASK_ALL, 1, UINT_MAX, 0, false);
 | |
|             }
 | |
| 
 | |
|             var->regs[HLSL_REGSET_NUMERIC].id = reg_idx;
 | |
|             var->regs[HLSL_REGSET_NUMERIC].allocation_size = reg_size / 4;
 | |
|             var->regs[HLSL_REGSET_NUMERIC].writemask = VKD3DSP_WRITEMASK_ALL;
 | |
|             var->regs[HLSL_REGSET_NUMERIC].allocated = true;
 | |
|             TRACE("Allocated reserved %s to %s.\n", var->name,
 | |
|                     debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type));
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     vkd3d_free(allocator_used.allocations);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         unsigned int alloc_size = 4 * var->bind_count[HLSL_REGSET_NUMERIC];
 | |
| 
 | |
|         if (!var->is_uniform || alloc_size == 0)
 | |
|             continue;
 | |
| 
 | |
|         if (!var->regs[HLSL_REGSET_NUMERIC].allocated)
 | |
|         {
 | |
|             var->regs[HLSL_REGSET_NUMERIC] = allocate_range(ctx, &allocator, 1, UINT_MAX, alloc_size, 0, false);
 | |
|             TRACE("Allocated %s to %s.\n", var->name,
 | |
|                     debug_register('c', var->regs[HLSL_REGSET_NUMERIC], var->data_type));
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     allocate_const_registers_recurse(ctx, &entry_func->body, &allocator);
 | |
| 
 | |
|     allocate_sincos_const_registers(ctx, &entry_func->body, &allocator);
 | |
| 
 | |
|     vkd3d_free(allocator.allocations);
 | |
| }
 | |
| 
 | |
| /* Simple greedy temporary register allocation pass that just assigns a unique
 | |
|  * index to all (simultaneously live) variables or intermediate values. Agnostic
 | |
|  * as to how many registers are actually available for the current backend, and
 | |
|  * does not handle constants. */
 | |
| static uint32_t allocate_temp_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
 | |
| {
 | |
|     struct register_allocator allocator = {0};
 | |
|     struct hlsl_scope *scope;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     /* Reset variable temp register allocations. */
 | |
|     LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
 | |
|     {
 | |
|         LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
 | |
|         {
 | |
|             if (!(var->is_input_semantic || var->is_output_semantic || var->is_uniform))
 | |
|                 memset(var->regs, 0, sizeof(var->regs));
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* ps_1_* outputs are special and go in temp register 0. */
 | |
|     if (ctx->profile->major_version == 1 && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
 | |
|     {
 | |
|         LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|         {
 | |
|             if (var->is_output_semantic)
 | |
|             {
 | |
|                 record_allocation(ctx, &allocator, 0, VKD3DSP_WRITEMASK_ALL,
 | |
|                         var->first_write, UINT_MAX, 0, false);
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     allocate_temp_registers_recurse(ctx, &entry_func->body, &allocator);
 | |
|     vkd3d_free(allocator.allocations);
 | |
| 
 | |
|     if (allocator.indexable_count)
 | |
|         TRACE("Declaration of function \"%s\" required %u temp registers, and %u indexable temps.\n",
 | |
|                 entry_func->func->name, allocator.reg_count, allocator.indexable_count);
 | |
|     else
 | |
|         TRACE("Declaration of function \"%s\" required %u temp registers.\n",
 | |
|                 entry_func->func->name, allocator.reg_count);
 | |
| 
 | |
|     return allocator.reg_count;
 | |
| }
 | |
| 
 | |
| static enum vkd3d_shader_interpolation_mode sm4_get_interpolation_mode(struct hlsl_type *type,
 | |
|         unsigned int storage_modifiers)
 | |
| {
 | |
|     unsigned int i;
 | |
| 
 | |
|     static const struct
 | |
|     {
 | |
|         unsigned int modifiers;
 | |
|         enum vkd3d_shader_interpolation_mode mode;
 | |
|     }
 | |
|     modes[] =
 | |
|     {
 | |
|         {HLSL_STORAGE_CENTROID | HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE_CENTROID},
 | |
|         {HLSL_STORAGE_NOPERSPECTIVE, VKD3DSIM_LINEAR_NOPERSPECTIVE},
 | |
|         {HLSL_STORAGE_CENTROID, VKD3DSIM_LINEAR_CENTROID},
 | |
|         {HLSL_STORAGE_CENTROID | HLSL_STORAGE_LINEAR, VKD3DSIM_LINEAR_CENTROID},
 | |
|     };
 | |
| 
 | |
|     if (hlsl_type_is_primitive_array(type))
 | |
|         type = type->e.array.type;
 | |
| 
 | |
|     VKD3D_ASSERT(hlsl_is_numeric_type(type));
 | |
| 
 | |
|     if ((storage_modifiers & HLSL_STORAGE_NOINTERPOLATION)
 | |
|             || base_type_get_semantic_equivalent(type->e.numeric.type) == HLSL_TYPE_UINT)
 | |
|         return VKD3DSIM_CONSTANT;
 | |
| 
 | |
|     for (i = 0; i < ARRAY_SIZE(modes); ++i)
 | |
|     {
 | |
|         if ((storage_modifiers & modes[i].modifiers) == modes[i].modifiers)
 | |
|             return modes[i].mode;
 | |
|     }
 | |
| 
 | |
|     return VKD3DSIM_LINEAR;
 | |
| }
 | |
| 
 | |
| static void allocate_semantic_register(struct hlsl_ctx *ctx, struct hlsl_ir_var *var,
 | |
|         struct register_allocator *allocator, bool output, bool optimize)
 | |
| {
 | |
|     static const char *const shader_names[] =
 | |
|     {
 | |
|         [VKD3D_SHADER_TYPE_PIXEL] = "Pixel",
 | |
|         [VKD3D_SHADER_TYPE_VERTEX] = "Vertex",
 | |
|         [VKD3D_SHADER_TYPE_GEOMETRY] = "Geometry",
 | |
|         [VKD3D_SHADER_TYPE_HULL] = "Hull",
 | |
|         [VKD3D_SHADER_TYPE_DOMAIN] = "Domain",
 | |
|         [VKD3D_SHADER_TYPE_COMPUTE] = "Compute",
 | |
|     };
 | |
| 
 | |
|     bool is_primitive = hlsl_type_is_primitive_array(var->data_type);
 | |
|     enum vkd3d_shader_register_type type;
 | |
|     struct vkd3d_shader_version version;
 | |
|     bool special_interpolation = false;
 | |
|     bool vip_allocation = false;
 | |
|     uint32_t reg;
 | |
|     bool builtin;
 | |
| 
 | |
|     VKD3D_ASSERT(var->semantic.name);
 | |
| 
 | |
|     version.major = ctx->profile->major_version;
 | |
|     version.minor = ctx->profile->minor_version;
 | |
|     version.type = ctx->profile->type;
 | |
| 
 | |
|     if (version.major < 4)
 | |
|     {
 | |
|         enum vkd3d_decl_usage usage;
 | |
|         uint32_t usage_idx;
 | |
| 
 | |
|         /* ps_1_* outputs are special and go in temp register 0. */
 | |
|         if (version.major == 1 && output && version.type == VKD3D_SHADER_TYPE_PIXEL)
 | |
|             return;
 | |
| 
 | |
|         builtin = sm1_register_from_semantic_name(&version,
 | |
|                 var->semantic.name, var->semantic.index, output, NULL, &type, ®);
 | |
|         if (!builtin && !sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx))
 | |
|         {
 | |
|             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
 | |
|                     "Invalid semantic '%s'.", var->semantic.name);
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         if ((!output && !var->last_read) || (output && !var->first_write))
 | |
|             return;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         enum vkd3d_shader_sysval_semantic semantic;
 | |
|         bool has_idx;
 | |
| 
 | |
|         if (!sm4_sysval_semantic_from_semantic_name(&semantic, &version, ctx->semantic_compat_mapping, ctx->domain,
 | |
|                 var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_primitive))
 | |
|         {
 | |
|             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
 | |
|                     "Invalid semantic '%s'.", var->semantic.name);
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         if ((builtin = sm4_register_from_semantic_name(&version, var->semantic.name, output, &type, &has_idx)))
 | |
|             reg = has_idx ? var->semantic.index : 0;
 | |
| 
 | |
|         if (semantic == VKD3D_SHADER_SV_TESS_FACTOR_TRIINT)
 | |
|         {
 | |
|             /* While SV_InsideTessFactor can be declared as 'float' for "tri"
 | |
|              * domains, it is allocated as if it was 'float[1]'. */
 | |
|             var->force_align = true;
 | |
|         }
 | |
| 
 | |
|         if (semantic == VKD3D_SHADER_SV_RENDER_TARGET_ARRAY_INDEX
 | |
|                 || semantic == VKD3D_SHADER_SV_VIEWPORT_ARRAY_INDEX
 | |
|                 || semantic == VKD3D_SHADER_SV_PRIMITIVE_ID)
 | |
|             vip_allocation = true;
 | |
| 
 | |
|         if (semantic == VKD3D_SHADER_SV_IS_FRONT_FACE || semantic == VKD3D_SHADER_SV_SAMPLE_INDEX
 | |
|                 || (version.type == VKD3D_SHADER_TYPE_DOMAIN && !output && !is_primitive)
 | |
|                 || (ctx->is_patch_constant_func && output))
 | |
|             special_interpolation = true;
 | |
|     }
 | |
| 
 | |
|     if (builtin)
 | |
|     {
 | |
|         TRACE("%s %s semantic %s[%u] matches predefined register %#x[%u].\n", shader_names[version.type],
 | |
|                 output ? "output" : "input", var->semantic.name, var->semantic.index, type, reg);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         unsigned int component_count = is_primitive
 | |
|                 ? var->data_type->e.array.type->e.numeric.dimx : var->data_type->e.numeric.dimx;
 | |
|         int mode = (ctx->profile->major_version < 4)
 | |
|                 ? 0 : sm4_get_interpolation_mode(var->data_type, var->storage_modifiers);
 | |
|         unsigned int reg_size = optimize ? component_count : 4;
 | |
| 
 | |
|         if (special_interpolation)
 | |
|             mode = VKD3DSIM_NONE;
 | |
| 
 | |
|         var->regs[HLSL_REGSET_NUMERIC] = allocate_register(ctx, allocator, 1, UINT_MAX,
 | |
|                 reg_size, component_count, mode, var->force_align, vip_allocation);
 | |
| 
 | |
|         TRACE("Allocated %s to %s (mode %d).\n", var->name, debug_register(output ? 'o' : 'v',
 | |
|                 var->regs[HLSL_REGSET_NUMERIC], var->data_type), mode);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void allocate_semantic_registers(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
 | |
|         uint32_t *output_reg_count)
 | |
| {
 | |
|     struct register_allocator in_prim_allocator = {0}, patch_constant_out_patch_allocator = {0};
 | |
|     struct register_allocator input_allocator = {0}, output_allocator = {0};
 | |
|     bool is_vertex_shader = ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX;
 | |
|     bool is_pixel_shader = ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     in_prim_allocator.prioritize_smaller_writemasks = true;
 | |
|     patch_constant_out_patch_allocator.prioritize_smaller_writemasks = true;
 | |
|     input_allocator.prioritize_smaller_writemasks = true;
 | |
|     output_allocator.prioritize_smaller_writemasks = true;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (var->is_input_semantic)
 | |
|         {
 | |
|             if (hlsl_type_is_primitive_array(var->data_type))
 | |
|             {
 | |
|                 bool is_patch_constant_output_patch = ctx->is_patch_constant_func &&
 | |
|                         var->data_type->e.array.array_type == HLSL_ARRAY_PATCH_OUTPUT;
 | |
| 
 | |
|                 if (is_patch_constant_output_patch)
 | |
|                     allocate_semantic_register(ctx, var, &patch_constant_out_patch_allocator, false,
 | |
|                             !is_vertex_shader);
 | |
|                 else
 | |
|                     allocate_semantic_register(ctx, var, &in_prim_allocator, false,
 | |
|                             !is_vertex_shader);
 | |
|             }
 | |
|             else
 | |
|                 allocate_semantic_register(ctx, var, &input_allocator, false, !is_vertex_shader);
 | |
|         }
 | |
| 
 | |
|         if (var->is_output_semantic)
 | |
|             allocate_semantic_register(ctx, var, &output_allocator, true, !is_pixel_shader);
 | |
|     }
 | |
| 
 | |
|     *output_reg_count = output_allocator.reg_count;
 | |
| 
 | |
|     vkd3d_free(in_prim_allocator.allocations);
 | |
|     vkd3d_free(patch_constant_out_patch_allocator.allocations);
 | |
|     vkd3d_free(input_allocator.allocations);
 | |
|     vkd3d_free(output_allocator.allocations);
 | |
| }
 | |
| 
 | |
| static const struct hlsl_buffer *get_reserved_buffer(struct hlsl_ctx *ctx,
 | |
|         uint32_t space, uint32_t index, bool allocated_only)
 | |
| {
 | |
|     const struct hlsl_buffer *buffer;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, const struct hlsl_buffer, entry)
 | |
|     {
 | |
|         if (buffer->reservation.reg_type == 'b'
 | |
|                 && buffer->reservation.reg_space == space && buffer->reservation.reg_index == index)
 | |
|         {
 | |
|             if (allocated_only && !buffer->reg.allocated)
 | |
|                 continue;
 | |
| 
 | |
|             return buffer;
 | |
|         }
 | |
|     }
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| static void hlsl_calculate_buffer_offset(struct hlsl_ctx *ctx, struct hlsl_ir_var *var, bool register_reservation)
 | |
| {
 | |
|     unsigned int var_reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC];
 | |
|     enum hlsl_type_class var_class = var->data_type->class;
 | |
|     struct hlsl_buffer *buffer = var->buffer;
 | |
| 
 | |
|     if (register_reservation)
 | |
|     {
 | |
|         var->buffer_offset = 4 * var->reg_reservation.reg_index;
 | |
|         var->has_explicit_bind_point = 1;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         if (var->reg_reservation.offset_type == 'c')
 | |
|         {
 | |
|             if (var->reg_reservation.offset_index % 4)
 | |
|             {
 | |
|                 if (var_class == HLSL_CLASS_MATRIX)
 | |
|                 {
 | |
|                     hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
 | |
|                             "packoffset() reservations with matrix types must be aligned with the beginning of a register.");
 | |
|                 }
 | |
|                 else if (var_class == HLSL_CLASS_ARRAY)
 | |
|                 {
 | |
|                     hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
 | |
|                             "packoffset() reservations with array types must be aligned with the beginning of a register.");
 | |
|                 }
 | |
|                 else if (var_class == HLSL_CLASS_STRUCT)
 | |
|                 {
 | |
|                     hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
 | |
|                             "packoffset() reservations with struct types must be aligned with the beginning of a register.");
 | |
|                 }
 | |
|                 else if (var_class == HLSL_CLASS_VECTOR)
 | |
|                 {
 | |
|                     unsigned int aligned_offset = hlsl_type_get_sm4_offset(var->data_type, var->reg_reservation.offset_index);
 | |
| 
 | |
|                     if (var->reg_reservation.offset_index != aligned_offset)
 | |
|                         hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
 | |
|                                 "packoffset() reservations with vector types cannot span multiple registers.");
 | |
|                 }
 | |
|             }
 | |
|             var->buffer_offset = var->reg_reservation.offset_index;
 | |
|             var->has_explicit_bind_point = 1;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             var->buffer_offset = hlsl_type_get_sm4_offset(var->data_type, buffer->size);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     TRACE("Allocated buffer offset %u to %s.\n", var->buffer_offset, var->name);
 | |
|     buffer->size = max(buffer->size, var->buffer_offset + var_reg_size);
 | |
|     if (var->is_read)
 | |
|         buffer->used_size = max(buffer->used_size, var->buffer_offset + var_reg_size);
 | |
| }
 | |
| 
 | |
| static void validate_buffer_offsets(struct hlsl_ctx *ctx)
 | |
| {
 | |
|     struct hlsl_ir_var *var1, *var2;
 | |
|     struct hlsl_buffer *buffer;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (!var1->is_uniform || hlsl_type_is_resource(var1->data_type))
 | |
|             continue;
 | |
| 
 | |
|         buffer = var1->buffer;
 | |
|         if (!buffer->used_size)
 | |
|             continue;
 | |
| 
 | |
|         LIST_FOR_EACH_ENTRY(var2, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|         {
 | |
|             unsigned int var1_reg_size, var2_reg_size;
 | |
| 
 | |
|             if (!var2->is_uniform || hlsl_type_is_resource(var2->data_type))
 | |
|                 continue;
 | |
| 
 | |
|             if (var1 == var2 || var1->buffer != var2->buffer)
 | |
|                 continue;
 | |
| 
 | |
|             /* This is to avoid reporting the error twice for the same pair of overlapping variables. */
 | |
|             if (strcmp(var1->name, var2->name) >= 0)
 | |
|                 continue;
 | |
| 
 | |
|             var1_reg_size = var1->data_type->reg_size[HLSL_REGSET_NUMERIC];
 | |
|             var2_reg_size = var2->data_type->reg_size[HLSL_REGSET_NUMERIC];
 | |
| 
 | |
|             if (var1->buffer_offset < var2->buffer_offset + var2_reg_size
 | |
|                     && var2->buffer_offset < var1->buffer_offset + var1_reg_size)
 | |
|                 hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
 | |
|                         "Invalid packoffset() reservation: Variables %s and %s overlap.",
 | |
|                         var1->name, var2->name);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var1, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         buffer = var1->buffer;
 | |
|         if (!buffer || buffer == ctx->globals_buffer)
 | |
|             continue;
 | |
| 
 | |
|         if (var1->reg_reservation.offset_type
 | |
|                 || var1->reg_reservation.reg_type == 's'
 | |
|                 || var1->reg_reservation.reg_type == 't'
 | |
|                 || var1->reg_reservation.reg_type == 'u')
 | |
|             buffer->manually_packed_elements = true;
 | |
|         else
 | |
|             buffer->automatically_packed_elements = true;
 | |
| 
 | |
|         if (buffer->manually_packed_elements && buffer->automatically_packed_elements)
 | |
|         {
 | |
|             hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
 | |
|                     "packoffset() must be specified for all the buffer elements, or none of them.");
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| void hlsl_calculate_buffer_offsets(struct hlsl_ctx *ctx)
 | |
| {
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (!var->is_uniform || hlsl_type_is_resource(var->data_type))
 | |
|             continue;
 | |
| 
 | |
|         if (hlsl_var_has_buffer_offset_register_reservation(ctx, var))
 | |
|             hlsl_calculate_buffer_offset(ctx, var, true);
 | |
|     }
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (!var->is_uniform || hlsl_type_is_resource(var->data_type))
 | |
|             continue;
 | |
| 
 | |
|         if (!hlsl_var_has_buffer_offset_register_reservation(ctx, var))
 | |
|             hlsl_calculate_buffer_offset(ctx, var, false);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static unsigned int get_max_cbuffer_reg_index(struct hlsl_ctx *ctx)
 | |
| {
 | |
|     if (hlsl_version_ge(ctx, 5, 1))
 | |
|         return UINT_MAX;
 | |
| 
 | |
|     return 13;
 | |
| }
 | |
| 
 | |
| static void allocate_buffers(struct hlsl_ctx *ctx)
 | |
| {
 | |
|     struct hlsl_buffer *buffer;
 | |
|     uint32_t index = 0, id = 0;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (!var->is_uniform || hlsl_type_is_resource(var->data_type))
 | |
|             continue;
 | |
| 
 | |
|         if (var->is_param)
 | |
|             var->buffer = ctx->params_buffer;
 | |
|     }
 | |
| 
 | |
|     hlsl_calculate_buffer_offsets(ctx);
 | |
|     validate_buffer_offsets(ctx);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry)
 | |
|     {
 | |
|         if (!buffer->used_size)
 | |
|             continue;
 | |
| 
 | |
|         if (buffer->type == HLSL_BUFFER_CONSTANT)
 | |
|         {
 | |
|             const struct hlsl_reg_reservation *reservation = &buffer->reservation;
 | |
| 
 | |
|             if (reservation->reg_type == 'b')
 | |
|             {
 | |
|                 const struct hlsl_buffer *allocated_buffer = get_reserved_buffer(ctx,
 | |
|                         reservation->reg_space, reservation->reg_index, true);
 | |
|                 unsigned int max_index = get_max_cbuffer_reg_index(ctx);
 | |
| 
 | |
|                 if (buffer->reservation.reg_index > max_index)
 | |
|                     hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
 | |
|                             "Buffer reservation cb%u exceeds target's maximum (cb%u).",
 | |
|                             buffer->reservation.reg_index, max_index);
 | |
| 
 | |
|                 if (allocated_buffer && allocated_buffer != buffer)
 | |
|                 {
 | |
|                     hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS,
 | |
|                             "Multiple buffers bound to space %u, index %u.",
 | |
|                             reservation->reg_space, reservation->reg_index);
 | |
|                     hlsl_note(ctx, &allocated_buffer->loc, VKD3D_SHADER_LOG_ERROR,
 | |
|                             "Buffer %s is already bound to space %u, index %u.",
 | |
|                             allocated_buffer->name, reservation->reg_space, reservation->reg_index);
 | |
|                 }
 | |
| 
 | |
|                 buffer->reg.space = reservation->reg_space;
 | |
|                 buffer->reg.index = reservation->reg_index;
 | |
|                 if (hlsl_version_ge(ctx, 5, 1))
 | |
|                     buffer->reg.id = id++;
 | |
|                 else
 | |
|                     buffer->reg.id = buffer->reg.index;
 | |
|                 buffer->reg.allocation_size = 1;
 | |
|                 buffer->reg.allocated = true;
 | |
|                 TRACE("Allocated reserved %s to space %u, index %u, id %u.\n",
 | |
|                         buffer->name, buffer->reg.space, buffer->reg.index, buffer->reg.id);
 | |
|             }
 | |
|             else if (!reservation->reg_type)
 | |
|             {
 | |
|                 unsigned int max_index = get_max_cbuffer_reg_index(ctx);
 | |
|                 while (get_reserved_buffer(ctx, 0, index, false))
 | |
|                     ++index;
 | |
| 
 | |
|                 if (index > max_index)
 | |
|                     hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
 | |
|                         "Too many buffers reserved, target's maximum is %u.", max_index);
 | |
| 
 | |
|                 buffer->reg.space = 0;
 | |
|                 buffer->reg.index = index;
 | |
|                 if (hlsl_version_ge(ctx, 5, 1))
 | |
|                     buffer->reg.id = id++;
 | |
|                 else
 | |
|                     buffer->reg.id = buffer->reg.index;
 | |
|                 buffer->reg.allocation_size = 1;
 | |
|                 buffer->reg.allocated = true;
 | |
|                 TRACE("Allocated %s to space 0, index %u, id %u.\n", buffer->name, buffer->reg.index, buffer->reg.id);
 | |
|                 ++index;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 hlsl_error(ctx, &buffer->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_RESERVATION,
 | |
|                         "Constant buffers must be allocated to register type 'b'.");
 | |
|             }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             FIXME("Allocate registers for texture buffers.\n");
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static const struct hlsl_ir_var *get_allocated_object(struct hlsl_ctx *ctx, enum hlsl_regset regset,
 | |
|         uint32_t space, uint32_t index, bool allocated_only)
 | |
| {
 | |
|     const struct hlsl_ir_var *var;
 | |
|     unsigned int start, count;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, const struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (var->reg_reservation.reg_type == get_regset_name(regset)
 | |
|                 && var->data_type->reg_size[regset])
 | |
|         {
 | |
|             /* Vars with a reservation prevent non-reserved vars from being
 | |
|              * bound there even if the reserved vars aren't used. */
 | |
|             start = var->reg_reservation.reg_index;
 | |
|             count = var->data_type->reg_size[regset];
 | |
| 
 | |
|             if (var->reg_reservation.reg_space != space)
 | |
|                 continue;
 | |
| 
 | |
|             if (!var->regs[regset].allocated && allocated_only)
 | |
|                 continue;
 | |
|         }
 | |
|         else if (var->regs[regset].allocated)
 | |
|         {
 | |
|             if (var->regs[regset].space != space)
 | |
|                 continue;
 | |
| 
 | |
|             start = var->regs[regset].index;
 | |
|             count = var->regs[regset].allocation_size;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             continue;
 | |
|         }
 | |
| 
 | |
|         if (start <= index && index < start + count)
 | |
|             return var;
 | |
|     }
 | |
|     return NULL;
 | |
| }
 | |
| 
 | |
| static void allocate_objects(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func, enum hlsl_regset regset)
 | |
| {
 | |
|     char regset_name = get_regset_name(regset);
 | |
|     uint32_t min_index = 0, id = 0;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     if (regset == HLSL_REGSET_UAVS && ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
 | |
|     {
 | |
|         LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|         {
 | |
|             if (var->semantic.name && (!ascii_strcasecmp(var->semantic.name, "color")
 | |
|                     || !ascii_strcasecmp(var->semantic.name, "sv_target")))
 | |
|                 min_index = max(min_index, var->semantic.index + 1);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         unsigned int count = var->regs[regset].allocation_size;
 | |
| 
 | |
|         if (count == 0)
 | |
|             continue;
 | |
| 
 | |
|         /* The variable was already allocated if it has a reservation. */
 | |
|         if (var->regs[regset].allocated)
 | |
|         {
 | |
|             const struct hlsl_ir_var *reserved_object, *last_reported = NULL;
 | |
|             unsigned int i;
 | |
| 
 | |
|             if (var->regs[regset].index < min_index)
 | |
|             {
 | |
|                 VKD3D_ASSERT(regset == HLSL_REGSET_UAVS);
 | |
|                 hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS,
 | |
|                         "UAV index (%u) must be higher than the maximum render target index (%u).",
 | |
|                         var->regs[regset].index, min_index - 1);
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             for (i = 0; i < count; ++i)
 | |
|             {
 | |
|                 unsigned int space = var->regs[regset].space;
 | |
|                 unsigned int index = var->regs[regset].index + i;
 | |
| 
 | |
|                 /* get_allocated_object() may return "var" itself, but we
 | |
|                  * actually want that, otherwise we'll end up reporting the
 | |
|                  * same conflict between the same two variables twice. */
 | |
|                 reserved_object = get_allocated_object(ctx, regset, space, index, true);
 | |
|                 if (reserved_object && reserved_object != var && reserved_object != last_reported)
 | |
|                 {
 | |
|                     hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_OVERLAPPING_RESERVATIONS,
 | |
|                             "Multiple variables bound to space %u, %c%u.", regset_name, space, index);
 | |
|                     hlsl_note(ctx, &reserved_object->loc, VKD3D_SHADER_LOG_ERROR,
 | |
|                             "Variable '%s' is already bound to space %u, %c%u.",
 | |
|                             reserved_object->name, regset_name, space, index);
 | |
|                     last_reported = reserved_object;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             if (hlsl_version_ge(ctx, 5, 1))
 | |
|                 var->regs[regset].id = id++;
 | |
|             else
 | |
|                 var->regs[regset].id = var->regs[regset].index;
 | |
|             TRACE("Allocated reserved variable %s to space %u, indices %c%u-%c%u, id %u.\n",
 | |
|                     var->name, var->regs[regset].space, regset_name, var->regs[regset].index,
 | |
|                     regset_name, var->regs[regset].index + count, var->regs[regset].id);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             unsigned int index = min_index;
 | |
|             unsigned int available = 0;
 | |
| 
 | |
|             while (available < count)
 | |
|             {
 | |
|                 if (get_allocated_object(ctx, regset, 0, index, false))
 | |
|                     available = 0;
 | |
|                 else
 | |
|                     ++available;
 | |
|                 ++index;
 | |
|             }
 | |
|             index -= count;
 | |
| 
 | |
|             var->regs[regset].space = 0;
 | |
|             var->regs[regset].index = index;
 | |
|             if (hlsl_version_ge(ctx, 5, 1))
 | |
|                 var->regs[regset].id = id++;
 | |
|             else
 | |
|                 var->regs[regset].id = var->regs[regset].index;
 | |
|             var->regs[regset].allocated = true;
 | |
|             TRACE("Allocated variable %s to space 0, indices %c%u-%c%u, id %u.\n", var->name,
 | |
|                     regset_name, index, regset_name, index + count, var->regs[regset].id);
 | |
|             ++index;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void allocate_stream_outputs(struct hlsl_ctx *ctx)
 | |
| {
 | |
|     struct hlsl_ir_var *var;
 | |
|     uint32_t index = 0;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (!var->data_type->reg_size[HLSL_REGSET_STREAM_OUTPUTS])
 | |
|             continue;
 | |
| 
 | |
|         /* We should have ensured that all stream output objects are single-element. */
 | |
|         VKD3D_ASSERT(var->data_type->reg_size[HLSL_REGSET_STREAM_OUTPUTS] == 1);
 | |
| 
 | |
|         var->regs[HLSL_REGSET_STREAM_OUTPUTS].space = 0;
 | |
|         var->regs[HLSL_REGSET_STREAM_OUTPUTS].index = index;
 | |
|         var->regs[HLSL_REGSET_STREAM_OUTPUTS].id = index;
 | |
|         var->regs[HLSL_REGSET_STREAM_OUTPUTS].allocated = true;
 | |
| 
 | |
|         ++index;
 | |
|     }
 | |
| }
 | |
| 
 | |
| bool hlsl_component_index_range_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref,
 | |
|         unsigned int *start, unsigned int *count)
 | |
| {
 | |
|     struct hlsl_type *type = deref->var->data_type;
 | |
|     unsigned int i;
 | |
| 
 | |
|     *start = 0;
 | |
|     *count = 0;
 | |
| 
 | |
|     for (i = 0; i < deref->path_len; ++i)
 | |
|     {
 | |
|         struct hlsl_ir_node *path_node = deref->path[i].node;
 | |
|         unsigned int index;
 | |
| 
 | |
|         VKD3D_ASSERT(path_node);
 | |
|         if (path_node->type != HLSL_IR_CONSTANT)
 | |
|             return false;
 | |
| 
 | |
|         /* We should always have generated a cast to UINT. */
 | |
|         VKD3D_ASSERT(hlsl_is_vec1(path_node->data_type) && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT);
 | |
| 
 | |
|         if (!component_index_from_deref_path_node(path_node, type, &index))
 | |
|             return false;
 | |
|         *start += index;
 | |
| 
 | |
|         type = hlsl_get_element_type_from_path_index(ctx, type, path_node);
 | |
|     }
 | |
| 
 | |
|     *count = hlsl_type_component_count(type);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Retrieves true if the index is constant, and false otherwise. In the latter case, the maximum
 | |
|  * possible index is retrieved, assuming there is not out-of-bounds access. */
 | |
| bool hlsl_regset_index_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref,
 | |
|         enum hlsl_regset regset, unsigned int *index)
 | |
| {
 | |
|     struct hlsl_type *type = deref->var->data_type;
 | |
|     bool index_is_constant = true;
 | |
|     unsigned int i;
 | |
| 
 | |
|     *index = 0;
 | |
| 
 | |
|     for (i = 0; i < deref->path_len; ++i)
 | |
|     {
 | |
|         struct hlsl_ir_node *path_node = deref->path[i].node;
 | |
|         unsigned int idx = 0;
 | |
| 
 | |
|         VKD3D_ASSERT(path_node);
 | |
|         if (path_node->type == HLSL_IR_CONSTANT)
 | |
|         {
 | |
|             /* We should always have generated a cast to UINT. */
 | |
|             VKD3D_ASSERT(hlsl_is_vec1(path_node->data_type) && path_node->data_type->e.numeric.type == HLSL_TYPE_UINT);
 | |
| 
 | |
|             idx = hlsl_ir_constant(path_node)->value.u[0].u;
 | |
| 
 | |
|             switch (type->class)
 | |
|             {
 | |
|                 case HLSL_CLASS_ARRAY:
 | |
|                     if (idx >= type->e.array.elements_count)
 | |
|                         return false;
 | |
| 
 | |
|                     *index += idx * type->e.array.type->reg_size[regset];
 | |
|                     break;
 | |
| 
 | |
|                 case HLSL_CLASS_STRUCT:
 | |
|                     *index += type->e.record.fields[idx].reg_offset[regset];
 | |
|                     break;
 | |
| 
 | |
|                 case HLSL_CLASS_MATRIX:
 | |
|                     *index += 4 * idx;
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     vkd3d_unreachable();
 | |
|             }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             index_is_constant = false;
 | |
| 
 | |
|             switch (type->class)
 | |
|             {
 | |
|                 case HLSL_CLASS_ARRAY:
 | |
|                     idx = type->e.array.elements_count - 1;
 | |
|                     *index += idx * type->e.array.type->reg_size[regset];
 | |
|                     break;
 | |
| 
 | |
|                 case HLSL_CLASS_MATRIX:
 | |
|                     idx = hlsl_type_major_size(type) - 1;
 | |
|                     *index += idx * 4;
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     vkd3d_unreachable();
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         type = hlsl_get_element_type_from_path_index(ctx, type, path_node);
 | |
|     }
 | |
| 
 | |
|     VKD3D_ASSERT(!(regset <= HLSL_REGSET_LAST_OBJECT) || (type->reg_size[regset] == 1));
 | |
|     VKD3D_ASSERT(!(regset == HLSL_REGSET_NUMERIC) || type->reg_size[regset] <= 4);
 | |
|     return index_is_constant;
 | |
| }
 | |
| 
 | |
| bool hlsl_offset_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref, unsigned int *offset)
 | |
| {
 | |
|     enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref);
 | |
|     struct hlsl_ir_node *offset_node = deref->rel_offset.node;
 | |
|     unsigned int size;
 | |
| 
 | |
|     *offset = deref->const_offset;
 | |
| 
 | |
|     if (hlsl_type_is_primitive_array(deref->var->data_type))
 | |
|         return false;
 | |
| 
 | |
|     if (offset_node)
 | |
|     {
 | |
|         /* We should always have generated a cast to UINT. */
 | |
|         VKD3D_ASSERT(hlsl_is_vec1(offset_node->data_type) && offset_node->data_type->e.numeric.type == HLSL_TYPE_UINT);
 | |
|         VKD3D_ASSERT(offset_node->type != HLSL_IR_CONSTANT);
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     size = deref->var->data_type->reg_size[regset];
 | |
|     if (*offset >= size)
 | |
|     {
 | |
|         /* FIXME: Report a more specific location for the constant deref. */
 | |
|         hlsl_error(ctx, &deref->var->loc, VKD3D_SHADER_ERROR_HLSL_OFFSET_OUT_OF_BOUNDS,
 | |
|                 "Dereference is out of bounds. %u/%u", *offset, size);
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| unsigned int hlsl_offset_from_deref_safe(struct hlsl_ctx *ctx, const struct hlsl_deref *deref)
 | |
| {
 | |
|     unsigned int offset;
 | |
| 
 | |
|     if (hlsl_offset_from_deref(ctx, deref, &offset))
 | |
|         return offset;
 | |
| 
 | |
|     if (deref->rel_offset.node)
 | |
|         hlsl_fixme(ctx, &deref->rel_offset.node->loc, "Dereference with non-constant offset of type %s.",
 | |
|                 hlsl_node_type_to_string(deref->rel_offset.node->type));
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| struct hlsl_reg hlsl_reg_from_deref(struct hlsl_ctx *ctx, const struct hlsl_deref *deref)
 | |
| {
 | |
|     const struct hlsl_ir_var *var = deref->var;
 | |
|     struct hlsl_reg ret = var->regs[HLSL_REGSET_NUMERIC];
 | |
|     unsigned int offset = 0;
 | |
| 
 | |
|     VKD3D_ASSERT(deref->data_type);
 | |
|     VKD3D_ASSERT(hlsl_is_numeric_type(deref->data_type));
 | |
| 
 | |
|     if (!hlsl_type_is_primitive_array(deref->var->data_type))
 | |
|         offset = hlsl_offset_from_deref_safe(ctx, deref);
 | |
| 
 | |
|     ret.index += offset / 4;
 | |
|     ret.id += offset / 4;
 | |
| 
 | |
|     ret.writemask = 0xf & (0xf << (offset % 4));
 | |
|     if (var->regs[HLSL_REGSET_NUMERIC].writemask)
 | |
|         ret.writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, ret.writemask);
 | |
| 
 | |
|     return ret;
 | |
| }
 | |
| 
 | |
| static bool get_integral_argument_value(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr,
 | |
|         unsigned int i, int *value)
 | |
| {
 | |
|     const struct hlsl_ir_node *instr = attr->args[i].node;
 | |
|     const struct hlsl_type *type = instr->data_type;
 | |
| 
 | |
|     if (type->class != HLSL_CLASS_SCALAR
 | |
|             || (type->e.numeric.type != HLSL_TYPE_INT && type->e.numeric.type != HLSL_TYPE_UINT))
 | |
|     {
 | |
|         struct vkd3d_string_buffer *string;
 | |
| 
 | |
|         if ((string = hlsl_type_to_string(ctx, type)))
 | |
|             hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
 | |
|                     "Unexpected type for argument %u of [%s]: expected int or uint, but got %s.",
 | |
|                     i, attr->name, string->buffer);
 | |
|         hlsl_release_string_buffer(ctx, string);
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     if (instr->type != HLSL_IR_CONSTANT)
 | |
|     {
 | |
|         hlsl_fixme(ctx, &instr->loc, "Non-constant expression in [%s] initializer.", attr->name);
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     *value = hlsl_ir_constant(instr)->value.u[0].i;
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static const char *get_string_argument_value(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr, unsigned int i)
 | |
| {
 | |
|     const struct hlsl_ir_node *instr = attr->args[i].node;
 | |
|     const struct hlsl_type *type = instr->data_type;
 | |
| 
 | |
|     if (type->class != HLSL_CLASS_STRING)
 | |
|     {
 | |
|         struct vkd3d_string_buffer *string;
 | |
| 
 | |
|         if ((string = hlsl_type_to_string(ctx, type)))
 | |
|             hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
 | |
|                     "Wrong type for the argument %u of [%s]: expected string, but got %s.",
 | |
|                     i, attr->name, string->buffer);
 | |
|         hlsl_release_string_buffer(ctx, string);
 | |
|         return NULL;
 | |
|     }
 | |
| 
 | |
|     return hlsl_ir_string_constant(instr)->string;
 | |
| }
 | |
| 
 | |
| static void parse_numthreads_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
 | |
| {
 | |
|     static const unsigned int limits[3] = {1024, 1024, 64};
 | |
|     unsigned int i;
 | |
| 
 | |
|     ctx->found_numthreads = 1;
 | |
| 
 | |
|     if (attr->args_count != 3)
 | |
|     {
 | |
|         hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
 | |
|                 "Expected 3 parameters for [numthreads] attribute, but got %u.", attr->args_count);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     for (i = 0; i < attr->args_count; ++i)
 | |
|     {
 | |
|         int value;
 | |
| 
 | |
|         if (!get_integral_argument_value(ctx, attr, i, &value))
 | |
|             return;
 | |
| 
 | |
|         if (value < 1 || value > limits[i])
 | |
|             hlsl_error(ctx, &attr->args[i].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT,
 | |
|                     "Dimension %u of the thread count must be between 1 and %u.", i, limits[i]);
 | |
| 
 | |
|         ctx->thread_count[i] = value;
 | |
|     }
 | |
| 
 | |
|     if (ctx->thread_count[0] * ctx->thread_count[1] * ctx->thread_count[2] > 1024)
 | |
|         hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_THREAD_COUNT,
 | |
|                 "Product of thread count parameters cannot exceed 1024.");
 | |
| }
 | |
| 
 | |
| static void parse_domain_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
 | |
| {
 | |
|     const char *value;
 | |
| 
 | |
|     if (attr->args_count != 1)
 | |
|     {
 | |
|         hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
 | |
|                 "Expected 1 parameter for [domain] attribute, but got %u.", attr->args_count);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (!(value = get_string_argument_value(ctx, attr, 0)))
 | |
|         return;
 | |
| 
 | |
|     if (!strcmp(value, "isoline"))
 | |
|         ctx->domain = VKD3D_TESSELLATOR_DOMAIN_LINE;
 | |
|     else if (!strcmp(value, "tri"))
 | |
|         ctx->domain = VKD3D_TESSELLATOR_DOMAIN_TRIANGLE;
 | |
|     else if (!strcmp(value, "quad"))
 | |
|         ctx->domain = VKD3D_TESSELLATOR_DOMAIN_QUAD;
 | |
|     else
 | |
|         hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_DOMAIN,
 | |
|                 "Invalid tessellator domain \"%s\": expected \"isoline\", \"tri\", or \"quad\".",
 | |
|                 value);
 | |
| }
 | |
| 
 | |
| static void parse_outputcontrolpoints_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
 | |
| {
 | |
|     int value;
 | |
| 
 | |
|     if (attr->args_count != 1)
 | |
|     {
 | |
|         hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
 | |
|                 "Expected 1 parameter for [outputcontrolpoints] attribute, but got %u.", attr->args_count);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (!get_integral_argument_value(ctx, attr, 0, &value))
 | |
|         return;
 | |
| 
 | |
|     if (value < 0 || value > 32)
 | |
|         hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT,
 | |
|                 "Output control point count must be between 0 and 32.");
 | |
| 
 | |
|     ctx->output_control_point_count = value;
 | |
| }
 | |
| 
 | |
| static void parse_outputtopology_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
 | |
| {
 | |
|     const char *value;
 | |
| 
 | |
|     if (attr->args_count != 1)
 | |
|     {
 | |
|         hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
 | |
|                 "Expected 1 parameter for [outputtopology] attribute, but got %u.", attr->args_count);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (!(value = get_string_argument_value(ctx, attr, 0)))
 | |
|         return;
 | |
| 
 | |
|     if (!strcmp(value, "point"))
 | |
|         ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_POINT;
 | |
|     else if (!strcmp(value, "line"))
 | |
|         ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE;
 | |
|     else if (!strcmp(value, "triangle_cw"))
 | |
|         ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW;
 | |
|     else if (!strcmp(value, "triangle_ccw"))
 | |
|         ctx->output_primitive = VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW;
 | |
|     else
 | |
|         hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE,
 | |
|                 "Invalid tessellator output topology \"%s\": "
 | |
|                 "expected \"point\", \"line\", \"triangle_cw\", or \"triangle_ccw\".", value);
 | |
| }
 | |
| 
 | |
| static void parse_partitioning_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
 | |
| {
 | |
|     const char *value;
 | |
| 
 | |
|     if (attr->args_count != 1)
 | |
|     {
 | |
|         hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
 | |
|                 "Expected 1 parameter for [partitioning] attribute, but got %u.", attr->args_count);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (!(value = get_string_argument_value(ctx, attr, 0)))
 | |
|         return;
 | |
| 
 | |
|     if (!strcmp(value, "integer"))
 | |
|         ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_INTEGER;
 | |
|     else if (!strcmp(value, "pow2"))
 | |
|         ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_POW2;
 | |
|     else if (!strcmp(value, "fractional_even"))
 | |
|         ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN;
 | |
|     else if (!strcmp(value, "fractional_odd"))
 | |
|         ctx->partitioning = VKD3D_SHADER_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD;
 | |
|     else
 | |
|         hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_PARTITIONING,
 | |
|                 "Invalid tessellator partitioning \"%s\": "
 | |
|                 "expected \"integer\", \"pow2\", \"fractional_even\", or \"fractional_odd\".", value);
 | |
| }
 | |
| 
 | |
| static void parse_patchconstantfunc_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
 | |
| {
 | |
|     const char *name;
 | |
|     struct hlsl_ir_function *func;
 | |
|     struct hlsl_ir_function_decl *decl;
 | |
| 
 | |
|     if (attr->args_count != 1)
 | |
|     {
 | |
|         hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
 | |
|                 "Expected 1 parameter for [patchconstantfunc] attribute, but got %u.", attr->args_count);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (!(name = get_string_argument_value(ctx, attr, 0)))
 | |
|         return;
 | |
| 
 | |
|     ctx->patch_constant_func = NULL;
 | |
|     if ((func = hlsl_get_function(ctx, name)))
 | |
|     {
 | |
|         /* Pick the last overload with a body. */
 | |
|         LIST_FOR_EACH_ENTRY_REV(decl, &func->overloads, struct hlsl_ir_function_decl, entry)
 | |
|         {
 | |
|             if (decl->has_body)
 | |
|             {
 | |
|                 ctx->patch_constant_func = decl;
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (!ctx->patch_constant_func)
 | |
|         hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_NOT_DEFINED,
 | |
|                 "Patch constant function \"%s\" is not defined.", name);
 | |
| }
 | |
| 
 | |
| static void parse_maxvertexcount_attribute(struct hlsl_ctx *ctx, const struct hlsl_attribute *attr)
 | |
| {
 | |
|     int value;
 | |
| 
 | |
|     if (attr->args_count != 1)
 | |
|     {
 | |
|         hlsl_error(ctx, &attr->loc, VKD3D_SHADER_ERROR_HLSL_WRONG_PARAMETER_COUNT,
 | |
|                 "Expected 1 parameter for [maxvertexcount] attribute, but got %u.", attr->args_count);
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (!get_integral_argument_value(ctx, attr, 0, &value))
 | |
|         return;
 | |
| 
 | |
|     if (value < 1 || value > 1024)
 | |
|         hlsl_error(ctx, &attr->args[0].node->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MAX_VERTEX_COUNT,
 | |
|                 "Max vertex count must be between 1 and 1024.");
 | |
| 
 | |
|     ctx->max_vertex_count = value;
 | |
| }
 | |
| 
 | |
| static void parse_entry_function_attributes(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func)
 | |
| {
 | |
|     const struct hlsl_profile_info *profile = ctx->profile;
 | |
|     unsigned int i;
 | |
| 
 | |
|     for (i = 0; i < entry_func->attr_count; ++i)
 | |
|     {
 | |
|         const struct hlsl_attribute *attr = entry_func->attrs[i];
 | |
| 
 | |
|         if (!strcmp(attr->name, "numthreads") && profile->type == VKD3D_SHADER_TYPE_COMPUTE)
 | |
|             parse_numthreads_attribute(ctx, attr);
 | |
|         else if (!strcmp(attr->name, "domain")
 | |
|                     && (profile->type == VKD3D_SHADER_TYPE_HULL || profile->type == VKD3D_SHADER_TYPE_DOMAIN))
 | |
|             parse_domain_attribute(ctx, attr);
 | |
|         else if (!strcmp(attr->name, "outputcontrolpoints") && profile->type == VKD3D_SHADER_TYPE_HULL)
 | |
|             parse_outputcontrolpoints_attribute(ctx, attr);
 | |
|         else if (!strcmp(attr->name, "outputtopology") && profile->type == VKD3D_SHADER_TYPE_HULL)
 | |
|             parse_outputtopology_attribute(ctx, attr);
 | |
|         else if (!strcmp(attr->name, "partitioning") && profile->type == VKD3D_SHADER_TYPE_HULL)
 | |
|             parse_partitioning_attribute(ctx, attr);
 | |
|         else if (!strcmp(attr->name, "patchconstantfunc") && profile->type == VKD3D_SHADER_TYPE_HULL)
 | |
|             parse_patchconstantfunc_attribute(ctx, attr);
 | |
|         else if (!strcmp(attr->name, "earlydepthstencil") && profile->type == VKD3D_SHADER_TYPE_PIXEL)
 | |
|             entry_func->early_depth_test = true;
 | |
|         else if (!strcmp(attr->name, "maxvertexcount") && profile->type == VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|             parse_maxvertexcount_attribute(ctx, attr);
 | |
|         else
 | |
|             hlsl_warning(ctx, &entry_func->attrs[i]->loc, VKD3D_SHADER_WARNING_HLSL_UNKNOWN_ATTRIBUTE,
 | |
|                     "Ignoring unknown attribute \"%s\".", entry_func->attrs[i]->name);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void validate_hull_shader_attributes(struct hlsl_ctx *ctx, const struct hlsl_ir_function_decl *entry_func)
 | |
| {
 | |
|     if (ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID)
 | |
|     {
 | |
|         hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
 | |
|                 "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name);
 | |
|     }
 | |
| 
 | |
|     if (ctx->output_control_point_count == UINT_MAX)
 | |
|     {
 | |
|         hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
 | |
|                 "Entry point \"%s\" is missing a [outputcontrolpoints] attribute.", entry_func->func->name);
 | |
|     }
 | |
| 
 | |
|     if (!ctx->output_primitive)
 | |
|     {
 | |
|         hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
 | |
|                 "Entry point \"%s\" is missing a [outputtopology] attribute.", entry_func->func->name);
 | |
|     }
 | |
| 
 | |
|     if (!ctx->partitioning)
 | |
|     {
 | |
|         hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
 | |
|                 "Entry point \"%s\" is missing a [partitioning] attribute.", entry_func->func->name);
 | |
|     }
 | |
| 
 | |
|     if (!ctx->patch_constant_func)
 | |
|     {
 | |
|         hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
 | |
|                 "Entry point \"%s\" is missing a [patchconstantfunc] attribute.", entry_func->func->name);
 | |
|     }
 | |
|     else if (ctx->patch_constant_func == entry_func)
 | |
|     {
 | |
|         hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_RECURSIVE_CALL,
 | |
|                 "Patch constant function cannot be the entry point function.");
 | |
|         /* Native returns E_NOTIMPL instead of E_FAIL here. */
 | |
|         ctx->result = VKD3D_ERROR_NOT_IMPLEMENTED;
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     switch (ctx->domain)
 | |
|     {
 | |
|         case VKD3D_TESSELLATOR_DOMAIN_LINE:
 | |
|             if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CW
 | |
|                     || ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_TRIANGLE_CCW)
 | |
|                 hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE,
 | |
|                         "Triangle output topologies are not available for isoline domains.");
 | |
|             break;
 | |
| 
 | |
|         case VKD3D_TESSELLATOR_DOMAIN_TRIANGLE:
 | |
|             if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE)
 | |
|                 hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE,
 | |
|                         "Line output topologies are not available for triangle domains.");
 | |
|             break;
 | |
| 
 | |
|         case VKD3D_TESSELLATOR_DOMAIN_QUAD:
 | |
|             if (ctx->output_primitive == VKD3D_SHADER_TESSELLATOR_OUTPUT_LINE)
 | |
|                 hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_OUTPUT_PRIMITIVE,
 | |
|                         "Line output topologies are not available for quad domains.");
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static enum vkd3d_primitive_type get_primitive_type(struct hlsl_ctx *ctx, struct hlsl_ir_var *var)
 | |
| {
 | |
|     uint32_t prim_modifier = var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK;
 | |
|     enum vkd3d_primitive_type prim_type = VKD3D_PT_UNDEFINED;
 | |
| 
 | |
|     if (prim_modifier)
 | |
|     {
 | |
|         unsigned int count = var->data_type->e.array.elements_count;
 | |
|         unsigned int expected_count;
 | |
| 
 | |
|         VKD3D_ASSERT(!(prim_modifier & (prim_modifier - 1)));
 | |
| 
 | |
|         switch (prim_modifier)
 | |
|         {
 | |
|             case HLSL_PRIMITIVE_POINT:
 | |
|                 prim_type = VKD3D_PT_POINTLIST;
 | |
|                 expected_count = 1;
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_PRIMITIVE_LINE:
 | |
|                 prim_type = VKD3D_PT_LINELIST;
 | |
|                 expected_count = 2;
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_PRIMITIVE_TRIANGLE:
 | |
|                 prim_type = VKD3D_PT_TRIANGLELIST;
 | |
|                 expected_count = 3;
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_PRIMITIVE_LINEADJ:
 | |
|                 prim_type = VKD3D_PT_LINELIST_ADJ;
 | |
|                 expected_count = 4;
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_PRIMITIVE_TRIANGLEADJ:
 | |
|                 prim_type = VKD3D_PT_TRIANGLELIST_ADJ;
 | |
|                 expected_count = 6;
 | |
|                 break;
 | |
| 
 | |
|             default:
 | |
|                 vkd3d_unreachable();
 | |
|         }
 | |
| 
 | |
|         if (count != expected_count)
 | |
|         {
 | |
|             struct vkd3d_string_buffer *string;
 | |
| 
 | |
|             if ((string = hlsl_modifiers_to_string(ctx, prim_modifier)))
 | |
|                 hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT,
 | |
|                         "Control point count %u does not match the expect count %u for the %s input primitive type.",
 | |
|                         count, expected_count, string->buffer);
 | |
|             hlsl_release_string_buffer(ctx, string);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* Patch types take precedence over primitive modifiers. */
 | |
|     if (hlsl_type_is_patch_array(var->data_type))
 | |
|         prim_type = VKD3D_PT_PATCH;
 | |
| 
 | |
|     VKD3D_ASSERT(prim_type != VKD3D_PT_UNDEFINED);
 | |
|     return prim_type;
 | |
| }
 | |
| 
 | |
| 
 | |
| static void validate_and_record_prim_type(struct hlsl_ctx *ctx, struct hlsl_ir_var *var)
 | |
| {
 | |
|     unsigned int control_point_count = var->data_type->e.array.elements_count;
 | |
|     enum hlsl_array_type array_type = var->data_type->e.array.array_type;
 | |
|     struct hlsl_type *control_point_type = var->data_type->e.array.type;
 | |
|     const struct hlsl_profile_info *profile = ctx->profile;
 | |
| 
 | |
|     if (array_type == HLSL_ARRAY_PATCH_INPUT)
 | |
|     {
 | |
|         if (profile->type != VKD3D_SHADER_TYPE_HULL
 | |
|                 && !(profile->type == VKD3D_SHADER_TYPE_GEOMETRY && hlsl_version_ge(ctx, 5, 0)))
 | |
|         {
 | |
|             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
 | |
|                     "InputPatch parameters can only be used in hull shaders, "
 | |
|                     "and geometry shaders with shader model 5.0 or higher.");
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
|     else if (array_type == HLSL_ARRAY_PATCH_OUTPUT)
 | |
|     {
 | |
|         if (!ctx->is_patch_constant_func && profile->type != VKD3D_SHADER_TYPE_DOMAIN)
 | |
|         {
 | |
|             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
 | |
|                     "OutputPatch parameters can only be used in "
 | |
|                     "hull shader patch constant functions and domain shaders.");
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if ((var->data_type->modifiers & HLSL_PRIMITIVE_MODIFIERS_MASK) && profile->type != VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|     {
 | |
|         hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
 | |
|                 "Input primitive parameters can only be used in geometry shaders.");
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|     {
 | |
|         enum vkd3d_primitive_type prim_type = get_primitive_type(ctx, var);
 | |
| 
 | |
|         if (ctx->input_primitive_type == VKD3D_PT_UNDEFINED)
 | |
|         {
 | |
|             ctx->input_primitive_type = prim_type;
 | |
|         }
 | |
|         else if (ctx->input_primitive_type != prim_type)
 | |
|         {
 | |
|             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
 | |
|                     "Input primitive type does not match the previously declared type.");
 | |
|             hlsl_note(ctx, &ctx->input_primitive_param->loc, VKD3D_SHADER_LOG_ERROR,
 | |
|                     "The input primitive was previously declared here.");
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (control_point_count > 32)
 | |
|     {
 | |
|         hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT,
 | |
|                 "Control point count %u exceeds 32.", control_point_count);
 | |
|         return;
 | |
|     }
 | |
|     VKD3D_ASSERT(control_point_count > 0);
 | |
| 
 | |
|     if (ctx->is_patch_constant_func && array_type == HLSL_ARRAY_PATCH_OUTPUT)
 | |
|     {
 | |
|         if (control_point_count != ctx->output_control_point_count)
 | |
|             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT,
 | |
|                     "Output control point count %u does not match the count %u declared in the control point function.",
 | |
|                     control_point_count, ctx->output_control_point_count);
 | |
| 
 | |
|         if (!hlsl_types_are_equal(control_point_type, ctx->output_control_point_type))
 | |
|             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
 | |
|                     "Output control point type does not match the output type of the control point function.");
 | |
| 
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     if (ctx->input_control_point_count != UINT_MAX)
 | |
|     {
 | |
|         VKD3D_ASSERT(profile->type == VKD3D_SHADER_TYPE_GEOMETRY || ctx->is_patch_constant_func);
 | |
| 
 | |
|         if (control_point_count != ctx->input_control_point_count)
 | |
|         {
 | |
|             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT,
 | |
|                     "Input control point count %u does not match the count %u declared previously.",
 | |
|                     control_point_count, ctx->input_control_point_count);
 | |
|             hlsl_note(ctx, &ctx->input_primitive_param->loc, VKD3D_SHADER_LOG_ERROR,
 | |
|                     "The input primitive was previously declared here.");
 | |
|         }
 | |
| 
 | |
|         if (profile->type != VKD3D_SHADER_TYPE_GEOMETRY
 | |
|                 && !hlsl_types_are_equal(control_point_type, ctx->input_control_point_type))
 | |
|         {
 | |
|             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
 | |
|                     "Input control point type does not match the input type declared previously.");
 | |
|             hlsl_note(ctx, &ctx->input_primitive_param->loc, VKD3D_SHADER_LOG_ERROR,
 | |
|                     "The input primitive was previously declared here.");
 | |
|         }
 | |
| 
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     ctx->input_control_point_count = control_point_count;
 | |
|     ctx->input_control_point_type = control_point_type;
 | |
|     ctx->input_primitive_param = var;
 | |
| }
 | |
| 
 | |
| static void validate_and_record_stream_outputs(struct hlsl_ctx *ctx)
 | |
| {
 | |
|     static const enum vkd3d_primitive_type prim_types[] =
 | |
|     {
 | |
|         [HLSL_STREAM_OUTPUT_POINT_STREAM]    = VKD3D_PT_POINTLIST,
 | |
|         [HLSL_STREAM_OUTPUT_LINE_STREAM]     = VKD3D_PT_LINESTRIP,
 | |
|         [HLSL_STREAM_OUTPUT_TRIANGLE_STREAM] = VKD3D_PT_TRIANGLESTRIP,
 | |
|     };
 | |
| 
 | |
|     bool reported_non_point_multistream = false, reported_nonzero_index = false, reported_invalid_index = false;
 | |
|     enum hlsl_so_object_type so_type;
 | |
|     const struct hlsl_type *type;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (!var->bind_count[HLSL_REGSET_STREAM_OUTPUTS])
 | |
|             continue;
 | |
| 
 | |
|         type = hlsl_get_stream_output_type(var->data_type);
 | |
|         so_type = type->e.so.so_type;
 | |
| 
 | |
|         VKD3D_ASSERT(so_type < ARRAY_SIZE(prim_types));
 | |
| 
 | |
|         if (ctx->output_topology_type == VKD3D_PT_UNDEFINED)
 | |
|         {
 | |
|             ctx->output_topology_type = prim_types[so_type];
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             if ((so_type != HLSL_STREAM_OUTPUT_POINT_STREAM || ctx->output_topology_type != VKD3D_PT_POINTLIST)
 | |
|                     && !reported_non_point_multistream)
 | |
|             {
 | |
|                 hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
 | |
|                         "Multiple output streams are only allowed with PointStream objects.");
 | |
|                 reported_non_point_multistream = true;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if (var->regs[HLSL_REGSET_STREAM_OUTPUTS].index && hlsl_version_lt(ctx, 5, 0) && !reported_nonzero_index)
 | |
|         {
 | |
|             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
 | |
|                     "Multiple output streams are only supported in shader model 5.0 or higher.");
 | |
|             reported_nonzero_index = true;
 | |
|         }
 | |
| 
 | |
|         if (var->regs[HLSL_REGSET_STREAM_OUTPUTS].index >= VKD3D_MAX_STREAM_COUNT && !reported_invalid_index)
 | |
|         {
 | |
|             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SIZE,
 | |
|                     "Output stream index %u exceeds the maximum index %u.",
 | |
|                     var->regs[HLSL_REGSET_STREAM_OUTPUTS].index, VKD3D_MAX_STREAM_COUNT - 1);
 | |
|             reported_invalid_index = true;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* TODO: check that maxvertexcount * outputdatasize <= 1024. */
 | |
| }
 | |
| 
 | |
| static void validate_max_output_size(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
 | |
|         uint32_t output_reg_count)
 | |
| {
 | |
|     unsigned int max_output_size, comp_count = 0;
 | |
|     unsigned int *reg_comp_count;
 | |
|     struct hlsl_ir_var *var;
 | |
|     uint32_t id;
 | |
| 
 | |
|     if (ctx->result)
 | |
|         return;
 | |
| 
 | |
|     if (!(reg_comp_count = hlsl_calloc(ctx, output_reg_count, sizeof(*reg_comp_count))))
 | |
|         return;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (!var->is_output_semantic)
 | |
|             continue;
 | |
| 
 | |
|         VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated);
 | |
|         id = var->regs[HLSL_REGSET_NUMERIC].id;
 | |
|         reg_comp_count[id] = max(reg_comp_count[id], vkd3d_log2i(var->regs[HLSL_REGSET_NUMERIC].writemask) + 1);
 | |
|     }
 | |
| 
 | |
|     for (id = 0; id < output_reg_count; ++id)
 | |
|         comp_count += reg_comp_count[id];
 | |
| 
 | |
|     max_output_size = ctx->max_vertex_count * comp_count;
 | |
|     if (max_output_size > 1024)
 | |
|         hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MAX_VERTEX_COUNT,
 | |
|                 "Max vertex count (%u) * output data component count (%u) = %u, which is greater than 1024.",
 | |
|                 ctx->max_vertex_count, comp_count, max_output_size);
 | |
| 
 | |
|     vkd3d_free(reg_comp_count);
 | |
| }
 | |
| 
 | |
| static void remove_unreachable_code(struct hlsl_ctx *ctx, struct hlsl_block *body)
 | |
| {
 | |
|     struct hlsl_ir_node *instr, *next;
 | |
|     struct hlsl_block block;
 | |
|     struct list *start;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY_SAFE(instr, next, &body->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         if (instr->type == HLSL_IR_IF)
 | |
|         {
 | |
|             struct hlsl_ir_if *iff = hlsl_ir_if(instr);
 | |
| 
 | |
|             remove_unreachable_code(ctx, &iff->then_block);
 | |
|             remove_unreachable_code(ctx, &iff->else_block);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_LOOP)
 | |
|         {
 | |
|             struct hlsl_ir_loop *loop = hlsl_ir_loop(instr);
 | |
| 
 | |
|             remove_unreachable_code(ctx, &loop->body);
 | |
|         }
 | |
|         else if (instr->type == HLSL_IR_SWITCH)
 | |
|         {
 | |
|             struct hlsl_ir_switch *s = hlsl_ir_switch(instr);
 | |
|             struct hlsl_ir_switch_case *c;
 | |
| 
 | |
|             LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|             {
 | |
|                 remove_unreachable_code(ctx, &c->body);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     /* Remove instructions past unconditional jumps. */
 | |
|     LIST_FOR_EACH_ENTRY(instr, &body->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         struct hlsl_ir_jump *jump;
 | |
| 
 | |
|         if (instr->type != HLSL_IR_JUMP)
 | |
|             continue;
 | |
| 
 | |
|         jump = hlsl_ir_jump(instr);
 | |
|         if (jump->type != HLSL_IR_JUMP_BREAK && jump->type != HLSL_IR_JUMP_CONTINUE)
 | |
|             continue;
 | |
| 
 | |
|         if (!(start = list_next(&body->instrs, &instr->entry)))
 | |
|             break;
 | |
| 
 | |
|         hlsl_block_init(&block);
 | |
|         list_move_slice_tail(&block.instrs, start, list_tail(&body->instrs));
 | |
|         hlsl_block_cleanup(&block);
 | |
| 
 | |
|         break;
 | |
|     }
 | |
| }
 | |
| 
 | |
| void hlsl_lower_index_loads(struct hlsl_ctx *ctx, struct hlsl_block *body)
 | |
| {
 | |
|     lower_ir(ctx, lower_index_loads, body);
 | |
| }
 | |
| 
 | |
| 
 | |
| static bool simplify_exprs(struct hlsl_ctx *ctx, struct hlsl_block *block)
 | |
| {
 | |
|     bool progress, any_progress = false;
 | |
| 
 | |
|     do
 | |
|     {
 | |
|         progress = hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, block, NULL);
 | |
|         progress |= hlsl_transform_ir(ctx, hlsl_normalize_binary_exprs, block, NULL);
 | |
|         progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_identities, block, NULL);
 | |
|         progress |= hlsl_transform_ir(ctx, hlsl_fold_constant_swizzles, block, NULL);
 | |
| 
 | |
|         any_progress |= progress;
 | |
|     } while (progress);
 | |
| 
 | |
|     return any_progress;
 | |
| }
 | |
| 
 | |
| static void hlsl_run_folding_passes(struct hlsl_ctx *ctx, struct hlsl_block *body)
 | |
| {
 | |
|     bool progress;
 | |
| 
 | |
|     hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL);
 | |
|     do
 | |
|     {
 | |
|         progress = simplify_exprs(ctx, body);
 | |
|         progress |= hlsl_copy_propagation_execute(ctx, body);
 | |
|         progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL);
 | |
|         progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL);
 | |
|         progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, body, NULL);
 | |
|     } while (progress);
 | |
|     hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL);
 | |
| }
 | |
| 
 | |
| void hlsl_run_const_passes(struct hlsl_ctx *ctx, struct hlsl_block *body)
 | |
| {
 | |
|     bool progress;
 | |
| 
 | |
|     lower_ir(ctx, lower_complex_casts, body);
 | |
|     lower_ir(ctx, lower_matrix_swizzles, body);
 | |
| 
 | |
|     lower_ir(ctx, lower_broadcasts, body);
 | |
|     while (hlsl_transform_ir(ctx, fold_redundant_casts, body, NULL));
 | |
|     do
 | |
|     {
 | |
|         progress = hlsl_transform_ir(ctx, split_array_copies, body, NULL);
 | |
|         progress |= hlsl_transform_ir(ctx, split_struct_copies, body, NULL);
 | |
|     }
 | |
|     while (progress);
 | |
|     hlsl_transform_ir(ctx, split_matrix_copies, body, NULL);
 | |
| 
 | |
|     lower_ir(ctx, lower_narrowing_casts, body);
 | |
|     lower_ir(ctx, lower_int_dot, body);
 | |
|     if (hlsl_version_ge(ctx, 4, 0))
 | |
|     {
 | |
|         lower_ir(ctx, lower_int_modulus_sm4, body);
 | |
|         lower_ir(ctx, lower_int_division_sm4, body);
 | |
|     }
 | |
|     lower_ir(ctx, lower_int_abs, body);
 | |
|     lower_ir(ctx, lower_casts_to_bool, body);
 | |
|     lower_ir(ctx, lower_float_modulus, body);
 | |
| 
 | |
|     hlsl_run_folding_passes(ctx, body);
 | |
| }
 | |
| 
 | |
| static void generate_vsir_signature_entry(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         struct shader_signature *signature, bool output, struct hlsl_ir_var *var)
 | |
| {
 | |
|     enum vkd3d_shader_component_type component_type = VKD3D_SHADER_COMPONENT_VOID;
 | |
|     bool is_primitive = hlsl_type_is_primitive_array(var->data_type);
 | |
|     enum vkd3d_shader_sysval_semantic sysval = VKD3D_SHADER_SV_NONE;
 | |
|     unsigned int register_index, mask, use_mask;
 | |
|     const char *name = var->semantic.name;
 | |
|     enum vkd3d_shader_register_type type;
 | |
|     struct signature_element *element;
 | |
| 
 | |
|     if (hlsl_version_ge(ctx, 4, 0))
 | |
|     {
 | |
|         struct vkd3d_string_buffer *string;
 | |
|         enum hlsl_base_type numeric_type;
 | |
|         bool has_idx, ret;
 | |
| 
 | |
|         ret = sm4_sysval_semantic_from_semantic_name(&sysval, &program->shader_version, ctx->semantic_compat_mapping,
 | |
|                 ctx->domain, var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_primitive);
 | |
|         VKD3D_ASSERT(ret);
 | |
|         if (sysval == ~0u)
 | |
|             return;
 | |
| 
 | |
|         if (sm4_register_from_semantic_name(&program->shader_version, var->semantic.name, output, &type, &has_idx))
 | |
|         {
 | |
|             register_index = has_idx ? var->semantic.index : ~0u;
 | |
|             mask = (1u << var->data_type->e.numeric.dimx) - 1;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated);
 | |
|             register_index = var->regs[HLSL_REGSET_NUMERIC].id;
 | |
|             mask = var->regs[HLSL_REGSET_NUMERIC].writemask;
 | |
|         }
 | |
| 
 | |
|         use_mask = mask; /* FIXME: retrieve use mask accurately. */
 | |
| 
 | |
|         if (var->data_type->class == HLSL_CLASS_ARRAY)
 | |
|             numeric_type = var->data_type->e.array.type->e.numeric.type;
 | |
|         else
 | |
|             numeric_type = var->data_type->e.numeric.type;
 | |
| 
 | |
|         switch (numeric_type)
 | |
|         {
 | |
|             case HLSL_TYPE_FLOAT:
 | |
|             case HLSL_TYPE_HALF:
 | |
|                 component_type = VKD3D_SHADER_COMPONENT_FLOAT;
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_TYPE_INT:
 | |
|                 component_type = VKD3D_SHADER_COMPONENT_INT;
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_TYPE_BOOL:
 | |
|             case HLSL_TYPE_MIN16UINT:
 | |
|             case HLSL_TYPE_UINT:
 | |
|                 component_type = VKD3D_SHADER_COMPONENT_UINT;
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_TYPE_DOUBLE:
 | |
|                 if ((string = hlsl_type_to_string(ctx, var->data_type)))
 | |
|                     hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
 | |
|                             "Invalid data type %s for semantic variable %s.", string->buffer, var->name);
 | |
|                 hlsl_release_string_buffer(ctx, string);
 | |
|                 break;
 | |
|         }
 | |
| 
 | |
|         if (sysval == VKD3D_SHADER_SV_TARGET && !ascii_strcasecmp(name, "color"))
 | |
|             name = "SV_Target";
 | |
|         else if (sysval == VKD3D_SHADER_SV_DEPTH && !ascii_strcasecmp(name, "depth"))
 | |
|             name ="SV_Depth";
 | |
|         else if (sysval == VKD3D_SHADER_SV_POSITION && !ascii_strcasecmp(name, "position"))
 | |
|             name = "SV_Position";
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         if ((!output && !var->last_read) || (output && !var->first_write))
 | |
|             return;
 | |
| 
 | |
|         if (!sm1_register_from_semantic_name(&program->shader_version,
 | |
|                 var->semantic.name, var->semantic.index, output, &sysval, &type, ®ister_index))
 | |
|         {
 | |
|             enum vkd3d_decl_usage usage;
 | |
|             unsigned int usage_idx;
 | |
|             bool ret;
 | |
| 
 | |
|             register_index = var->regs[HLSL_REGSET_NUMERIC].id;
 | |
| 
 | |
|             ret = sm1_usage_from_semantic_name(var->semantic.name, var->semantic.index, &usage, &usage_idx);
 | |
|             VKD3D_ASSERT(ret);
 | |
|             /* With the exception of vertex POSITION output, none of these are
 | |
|              * system values. Pixel POSITION input is not equivalent to
 | |
|              * SV_Position; the closer equivalent is VPOS, which is not declared
 | |
|              * as a semantic. */
 | |
|             if (program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX
 | |
|                     && output && usage == VKD3D_DECL_USAGE_POSITION)
 | |
|                 sysval = VKD3D_SHADER_SV_POSITION;
 | |
|             else
 | |
|                 sysval = VKD3D_SHADER_SV_NONE;
 | |
|         }
 | |
| 
 | |
|         mask = (1 << var->data_type->e.numeric.dimx) - 1;
 | |
| 
 | |
|         if (!ascii_strcasecmp(var->semantic.name, "PSIZE") && output
 | |
|                 && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX)
 | |
|         {
 | |
|             if (var->data_type->e.numeric.dimx > 1)
 | |
|                 hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
 | |
|                         "PSIZE output must have only 1 component in this shader model.");
 | |
|             /* For some reason the writemask has all components set. */
 | |
|             mask = VKD3DSP_WRITEMASK_ALL;
 | |
|         }
 | |
|         if (!ascii_strcasecmp(var->semantic.name, "FOG") && output && program->shader_version.major < 3
 | |
|                 && program->shader_version.type == VKD3D_SHADER_TYPE_VERTEX && var->data_type->e.numeric.dimx > 1)
 | |
|             hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_SEMANTIC,
 | |
|                     "FOG output must have only 1 component in this shader model.");
 | |
| 
 | |
|         use_mask = mask; /* FIXME: retrieve use mask accurately. */
 | |
|         component_type = VKD3D_SHADER_COMPONENT_FLOAT;
 | |
|     }
 | |
| 
 | |
|     if (!vkd3d_array_reserve((void **)&signature->elements, &signature->elements_capacity,
 | |
|             signature->element_count + 1, sizeof(*signature->elements)))
 | |
|     {
 | |
|         ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|         return;
 | |
|     }
 | |
|     element = &signature->elements[signature->element_count++];
 | |
|     memset(element, 0, sizeof(*element));
 | |
| 
 | |
|     if (!(element->semantic_name = vkd3d_strdup(name)))
 | |
|     {
 | |
|         --signature->element_count;
 | |
|         ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|         return;
 | |
|     }
 | |
|     element->semantic_index = var->semantic.index;
 | |
|     element->sysval_semantic = sysval;
 | |
|     element->component_type = component_type;
 | |
|     element->register_index = register_index;
 | |
|     element->target_location = register_index;
 | |
|     element->register_count = 1;
 | |
|     element->mask = mask;
 | |
|     element->used_mask = use_mask;
 | |
|     if (program->shader_version.type == VKD3D_SHADER_TYPE_PIXEL && !output)
 | |
|     {
 | |
|         if (program->shader_version.major >= 4)
 | |
|             element->interpolation_mode = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers);
 | |
|         else
 | |
|             element->interpolation_mode = VKD3DSIM_LINEAR;
 | |
|     }
 | |
| 
 | |
|     switch (var->data_type->e.numeric.type)
 | |
|     {
 | |
|         case HLSL_TYPE_BOOL:
 | |
|         case HLSL_TYPE_DOUBLE:
 | |
|         case HLSL_TYPE_FLOAT:
 | |
|         case HLSL_TYPE_HALF:
 | |
|         case HLSL_TYPE_INT:
 | |
|         case HLSL_TYPE_UINT:
 | |
|             element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_NONE;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_TYPE_MIN16UINT:
 | |
|             element->min_precision = VKD3D_SHADER_MINIMUM_PRECISION_UINT_16;
 | |
|             break;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void generate_vsir_signature(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_function_decl *func)
 | |
| {
 | |
|     bool is_domain = program->shader_version.type == VKD3D_SHADER_TYPE_DOMAIN;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     ctx->is_patch_constant_func = func == ctx->patch_constant_func;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (var->is_input_semantic)
 | |
|         {
 | |
|             bool is_patch = hlsl_type_is_patch_array(var->data_type);
 | |
| 
 | |
|             if (ctx->is_patch_constant_func)
 | |
|             {
 | |
|                 if (!is_patch)
 | |
|                     generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, var);
 | |
|             }
 | |
|             else if (is_domain)
 | |
|             {
 | |
|                 if (is_patch)
 | |
|                     generate_vsir_signature_entry(ctx, program, &program->input_signature, false, var);
 | |
|                 else
 | |
|                     generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, false, var);
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 generate_vsir_signature_entry(ctx, program, &program->input_signature, false, var);
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if (var->is_output_semantic)
 | |
|         {
 | |
|             if (ctx->is_patch_constant_func)
 | |
|                 generate_vsir_signature_entry(ctx, program, &program->patch_constant_signature, true, var);
 | |
|             else
 | |
|                 generate_vsir_signature_entry(ctx, program, &program->output_signature, true, var);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static enum vkd3d_data_type vsir_data_type_from_hlsl_type(struct hlsl_ctx *ctx, const struct hlsl_type *type)
 | |
| {
 | |
|     if (hlsl_version_lt(ctx, 4, 0))
 | |
|         return VKD3D_DATA_FLOAT;
 | |
| 
 | |
|     if (type->class == HLSL_CLASS_ARRAY)
 | |
|         return vsir_data_type_from_hlsl_type(ctx, type->e.array.type);
 | |
|     if (type->class == HLSL_CLASS_STRUCT)
 | |
|         return VKD3D_DATA_MIXED;
 | |
|     if (type->class <= HLSL_CLASS_LAST_NUMERIC)
 | |
|     {
 | |
|         switch (type->e.numeric.type)
 | |
|         {
 | |
|             case HLSL_TYPE_DOUBLE:
 | |
|                 return VKD3D_DATA_DOUBLE;
 | |
|             case HLSL_TYPE_FLOAT:
 | |
|                 return VKD3D_DATA_FLOAT;
 | |
|             case HLSL_TYPE_HALF:
 | |
|                 return VKD3D_DATA_HALF;
 | |
|             case HLSL_TYPE_INT:
 | |
|                 return VKD3D_DATA_INT;
 | |
|             case HLSL_TYPE_UINT:
 | |
|             case HLSL_TYPE_BOOL:
 | |
|             case HLSL_TYPE_MIN16UINT:
 | |
|                 return VKD3D_DATA_UINT;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| static enum vkd3d_data_type vsir_data_type_from_hlsl_instruction(struct hlsl_ctx *ctx,
 | |
|         const struct hlsl_ir_node *instr)
 | |
| {
 | |
|     return vsir_data_type_from_hlsl_type(ctx, instr->data_type);
 | |
| }
 | |
| 
 | |
| static uint32_t generate_vsir_get_src_swizzle(uint32_t src_writemask, uint32_t dst_writemask)
 | |
| {
 | |
|     uint32_t swizzle;
 | |
| 
 | |
|     swizzle = hlsl_swizzle_from_writemask(src_writemask);
 | |
|     swizzle = hlsl_map_swizzle(swizzle, dst_writemask);
 | |
|     return swizzle;
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_constant_defs(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         struct hlsl_block *block)
 | |
| {
 | |
|     struct vkd3d_shader_instruction_array *instructions = &program->instructions;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     unsigned int i, x;
 | |
| 
 | |
|     for (i = 0; i < ctx->constant_defs.count; ++i)
 | |
|     {
 | |
|         const struct hlsl_constant_register *constant_reg = &ctx->constant_defs.regs[i];
 | |
| 
 | |
|         if (!shader_instruction_array_reserve(instructions, instructions->count + 1))
 | |
|         {
 | |
|             ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         ins = &instructions->elements[instructions->count];
 | |
|         if (!vsir_instruction_init_with_params(program, ins, &constant_reg->loc, VKD3DSIH_DEF, 1, 1))
 | |
|         {
 | |
|             ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|             return;
 | |
|         }
 | |
|         ++instructions->count;
 | |
| 
 | |
|         dst_param = &ins->dst[0];
 | |
|         vsir_register_init(&dst_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
 | |
|         ins->dst[0].reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|         ins->dst[0].reg.idx[0].offset = constant_reg->index;
 | |
|         ins->dst[0].write_mask = VKD3DSP_WRITEMASK_ALL;
 | |
| 
 | |
|         src_param = &ins->src[0];
 | |
|         vsir_register_init(&src_param->reg, VKD3DSPR_IMMCONST, VKD3D_DATA_FLOAT, 0);
 | |
|         src_param->reg.type = VKD3DSPR_IMMCONST;
 | |
|         src_param->reg.precision = VKD3D_SHADER_REGISTER_PRECISION_DEFAULT;
 | |
|         src_param->reg.non_uniform = false;
 | |
|         src_param->reg.data_type = VKD3D_DATA_FLOAT;
 | |
|         src_param->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|         for (x = 0; x < 4; ++x)
 | |
|             src_param->reg.u.immconst_f32[x] = constant_reg->value.f[x];
 | |
|         src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_sampler_dcls(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_block *block)
 | |
| {
 | |
|     struct vkd3d_shader_instruction_array *instructions = &program->instructions;
 | |
|     enum vkd3d_shader_resource_type resource_type;
 | |
|     struct vkd3d_shader_register_range *range;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct vkd3d_shader_semantic *semantic;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     enum hlsl_sampler_dim sampler_dim;
 | |
|     struct hlsl_ir_var *var;
 | |
|     unsigned int i, count;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (!var->regs[HLSL_REGSET_SAMPLERS].allocated)
 | |
|             continue;
 | |
| 
 | |
|         count = var->bind_count[HLSL_REGSET_SAMPLERS];
 | |
|         for (i = 0; i < count; ++i)
 | |
|         {
 | |
|             if (var->objects_usage[HLSL_REGSET_SAMPLERS][i].used)
 | |
|             {
 | |
|                 sampler_dim = var->objects_usage[HLSL_REGSET_SAMPLERS][i].sampler_dim;
 | |
| 
 | |
|                 switch (sampler_dim)
 | |
|                 {
 | |
|                     case HLSL_SAMPLER_DIM_2D:
 | |
|                         resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_2D;
 | |
|                         break;
 | |
| 
 | |
|                     case HLSL_SAMPLER_DIM_CUBE:
 | |
|                         resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_CUBE;
 | |
|                         break;
 | |
| 
 | |
|                     case HLSL_SAMPLER_DIM_3D:
 | |
|                         resource_type = VKD3D_SHADER_RESOURCE_TEXTURE_3D;
 | |
|                         break;
 | |
| 
 | |
|                     case HLSL_SAMPLER_DIM_GENERIC:
 | |
|                         /* These can appear in sm4-style separate sample
 | |
|                          * instructions that haven't been lowered. */
 | |
|                         hlsl_fixme(ctx, &var->loc, "Generic samplers need to be lowered.");
 | |
|                         continue;
 | |
| 
 | |
|                     default:
 | |
|                         vkd3d_unreachable();
 | |
|                         break;
 | |
|                 }
 | |
| 
 | |
|                 if (!shader_instruction_array_reserve(instructions, instructions->count + 1))
 | |
|                 {
 | |
|                     ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|                     return;
 | |
|                 }
 | |
| 
 | |
|                 ins = &instructions->elements[instructions->count];
 | |
|                 if (!vsir_instruction_init_with_params(program, ins, &var->loc, VKD3DSIH_DCL, 0, 0))
 | |
|                 {
 | |
|                     ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|                     return;
 | |
|                 }
 | |
|                 ++instructions->count;
 | |
| 
 | |
|                 semantic = &ins->declaration.semantic;
 | |
|                 semantic->resource_type = resource_type;
 | |
| 
 | |
|                 dst_param = &semantic->resource.reg;
 | |
|                 vsir_register_init(&dst_param->reg, VKD3DSPR_SAMPLER, VKD3D_DATA_FLOAT, 1);
 | |
|                 dst_param->reg.dimension = VSIR_DIMENSION_NONE;
 | |
|                 dst_param->reg.idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index + i;
 | |
|                 dst_param->write_mask = 0;
 | |
|                 range = &semantic->resource.range;
 | |
|                 range->space = 0;
 | |
|                 range->first = range->last = dst_param->reg.idx[0].offset;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static enum vkd3d_shader_register_type sm4_get_semantic_register_type(enum vkd3d_shader_type shader_type,
 | |
|         bool is_patch_constant_func, const struct hlsl_ir_var *var)
 | |
| {
 | |
|     if (hlsl_type_is_primitive_array(var->data_type))
 | |
|     {
 | |
|         VKD3D_ASSERT(var->is_input_semantic);
 | |
| 
 | |
|         switch (shader_type)
 | |
|         {
 | |
|             case VKD3D_SHADER_TYPE_HULL:
 | |
|                 if (is_patch_constant_func)
 | |
|                 {
 | |
|                     bool is_inputpatch = var->data_type->e.array.array_type == HLSL_ARRAY_PATCH_INPUT;
 | |
| 
 | |
|                     return is_inputpatch ? VKD3DSPR_INCONTROLPOINT : VKD3DSPR_OUTCONTROLPOINT;
 | |
|                 }
 | |
|                 return VKD3DSPR_INPUT;
 | |
| 
 | |
|             case VKD3D_SHADER_TYPE_DOMAIN:
 | |
|                 return VKD3DSPR_INCONTROLPOINT;
 | |
| 
 | |
|             default:
 | |
|                 return VKD3DSPR_INPUT;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (var->is_output_semantic)
 | |
|         return VKD3DSPR_OUTPUT;
 | |
|     if (shader_type == VKD3D_SHADER_TYPE_DOMAIN)
 | |
|         return VKD3DSPR_PATCHCONST;
 | |
|     return VKD3DSPR_INPUT;
 | |
| }
 | |
| 
 | |
| static struct vkd3d_shader_instruction *generate_vsir_add_program_instruction(
 | |
|         struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         const struct vkd3d_shader_location *loc, enum vkd3d_shader_opcode opcode,
 | |
|         unsigned int dst_count, unsigned int src_count)
 | |
| {
 | |
|     struct vkd3d_shader_instruction_array *instructions = &program->instructions;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (!shader_instruction_array_reserve(instructions, instructions->count + 1))
 | |
|     {
 | |
|         ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|         return NULL;
 | |
|     }
 | |
|     ins = &instructions->elements[instructions->count];
 | |
|     if (!vsir_instruction_init_with_params(program, ins, loc, opcode, dst_count, src_count))
 | |
|     {
 | |
|         ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|         return NULL;
 | |
|     }
 | |
|     ++instructions->count;
 | |
|     return ins;
 | |
| }
 | |
| 
 | |
| static void vsir_src_from_hlsl_constant_value(struct vkd3d_shader_src_param *src,
 | |
|         struct hlsl_ctx *ctx, const struct hlsl_constant_value *value,
 | |
|         enum vkd3d_data_type type, unsigned int width, unsigned int map_writemask)
 | |
| {
 | |
|     unsigned int i, j;
 | |
| 
 | |
|     vsir_src_param_init(src, VKD3DSPR_IMMCONST, type, 0);
 | |
|     if (width == 1)
 | |
|     {
 | |
|         src->reg.u.immconst_u32[0] = value->u[0].u;
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     src->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|     for (i = 0, j = 0; i < 4; ++i)
 | |
|     {
 | |
|         if ((map_writemask & (1u << i)) && (j < width))
 | |
|             src->reg.u.immconst_u32[i] = value->u[j++].u;
 | |
|         else
 | |
|             src->reg.u.immconst_u32[i] = 0;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void vsir_src_from_hlsl_node(struct vkd3d_shader_src_param *src,
 | |
|         struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr, uint32_t map_writemask)
 | |
| {
 | |
|     struct hlsl_ir_constant *constant;
 | |
| 
 | |
|     if (hlsl_version_ge(ctx, 4, 0) && instr->type == HLSL_IR_CONSTANT)
 | |
|     {
 | |
|         /* In SM4 constants are inlined */
 | |
|         constant = hlsl_ir_constant(instr);
 | |
|         vsir_src_from_hlsl_constant_value(src, ctx, &constant->value,
 | |
|                 vsir_data_type_from_hlsl_instruction(ctx, instr), instr->data_type->e.numeric.dimx, map_writemask);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         vsir_register_init(&src->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
 | |
|         src->reg.idx[0].offset = instr->reg.id;
 | |
|         src->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|         src->swizzle = generate_vsir_get_src_swizzle(instr->reg.writemask, map_writemask);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static struct vkd3d_shader_src_param *sm4_generate_vsir_new_idx_src(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct hlsl_ir_node *rel_offset)
 | |
| {
 | |
|     struct vkd3d_shader_src_param *idx_src;
 | |
| 
 | |
|     if (!(idx_src = vsir_program_get_src_params(program, 1)))
 | |
|     {
 | |
|         ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|         return NULL;
 | |
|     }
 | |
| 
 | |
|     memset(idx_src, 0, sizeof(*idx_src));
 | |
|     vsir_src_from_hlsl_node(idx_src, ctx, rel_offset, VKD3DSP_WRITEMASK_ALL);
 | |
|     return idx_src;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_numeric_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref)
 | |
| {
 | |
|     const struct hlsl_ir_var *var = deref->var;
 | |
|     unsigned int offset_const_deref;
 | |
| 
 | |
|     reg->type = var->indexable ? VKD3DSPR_IDXTEMP : VKD3DSPR_TEMP;
 | |
|     reg->idx[0].offset = var->regs[HLSL_REGSET_NUMERIC].id;
 | |
|     reg->dimension = VSIR_DIMENSION_VEC4;
 | |
| 
 | |
|     VKD3D_ASSERT(var->regs[HLSL_REGSET_NUMERIC].allocated);
 | |
| 
 | |
|     if (!var->indexable)
 | |
|     {
 | |
|         offset_const_deref = hlsl_offset_from_deref_safe(ctx, deref);
 | |
|         reg->idx[0].offset += offset_const_deref / 4;
 | |
|         reg->idx_count = 1;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         offset_const_deref = deref->const_offset;
 | |
|         reg->idx[1].offset = offset_const_deref / 4;
 | |
|         reg->idx_count = 2;
 | |
| 
 | |
|         if (deref->rel_offset.node)
 | |
|         {
 | |
|             if (!(reg->idx[1].rel_addr = sm4_generate_vsir_new_idx_src(ctx, program, deref->rel_offset.node)))
 | |
|                 return false;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     *writemask = 0xf & (0xf << (offset_const_deref % 4));
 | |
|     if (var->regs[HLSL_REGSET_NUMERIC].writemask)
 | |
|         *writemask = hlsl_combine_writemasks(var->regs[HLSL_REGSET_NUMERIC].writemask, *writemask);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_reg_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         struct vkd3d_shader_register *reg, uint32_t *writemask, const struct hlsl_deref *deref)
 | |
| {
 | |
|     const struct vkd3d_shader_version *version = &program->shader_version;
 | |
|     const struct hlsl_type *data_type = hlsl_deref_get_type(ctx, deref);
 | |
|     const struct hlsl_ir_var *var = deref->var;
 | |
| 
 | |
|     if (var->is_uniform)
 | |
|     {
 | |
|         enum hlsl_regset regset = hlsl_deref_get_regset(ctx, deref);
 | |
| 
 | |
|         if (regset == HLSL_REGSET_TEXTURES)
 | |
|         {
 | |
|             reg->type = VKD3DSPR_RESOURCE;
 | |
|             reg->dimension = VSIR_DIMENSION_VEC4;
 | |
|             if (vkd3d_shader_ver_ge(version, 5, 1))
 | |
|             {
 | |
|                 reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].id;
 | |
|                 reg->idx[1].offset = var->regs[HLSL_REGSET_TEXTURES].index; /* FIXME: array index */
 | |
|                 reg->idx_count = 2;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 reg->idx[0].offset = var->regs[HLSL_REGSET_TEXTURES].index;
 | |
|                 reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref);
 | |
|                 reg->idx_count = 1;
 | |
|             }
 | |
|             VKD3D_ASSERT(regset == HLSL_REGSET_TEXTURES);
 | |
|             *writemask = VKD3DSP_WRITEMASK_ALL;
 | |
|         }
 | |
|         else if (regset == HLSL_REGSET_UAVS)
 | |
|         {
 | |
|             reg->type = VKD3DSPR_UAV;
 | |
|             reg->dimension = VSIR_DIMENSION_VEC4;
 | |
|             if (vkd3d_shader_ver_ge(version, 5, 1))
 | |
|             {
 | |
|                 reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].id;
 | |
|                 reg->idx[1].offset = var->regs[HLSL_REGSET_UAVS].index; /* FIXME: array index */
 | |
|                 reg->idx_count = 2;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 reg->idx[0].offset = var->regs[HLSL_REGSET_UAVS].index;
 | |
|                 reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref);
 | |
|                 reg->idx_count = 1;
 | |
|             }
 | |
|             VKD3D_ASSERT(regset == HLSL_REGSET_UAVS);
 | |
|             *writemask = VKD3DSP_WRITEMASK_ALL;
 | |
|         }
 | |
|         else if (regset == HLSL_REGSET_SAMPLERS)
 | |
|         {
 | |
|             reg->type = VKD3DSPR_SAMPLER;
 | |
|             reg->dimension = VSIR_DIMENSION_NONE;
 | |
|             if (vkd3d_shader_ver_ge(version, 5, 1))
 | |
|             {
 | |
|                 reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].id;
 | |
|                 reg->idx[1].offset = var->regs[HLSL_REGSET_SAMPLERS].index; /* FIXME: array index */
 | |
|                 reg->idx_count = 2;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 reg->idx[0].offset = var->regs[HLSL_REGSET_SAMPLERS].index;
 | |
|                 reg->idx[0].offset += hlsl_offset_from_deref_safe(ctx, deref);
 | |
|                 reg->idx_count = 1;
 | |
|             }
 | |
|             VKD3D_ASSERT(regset == HLSL_REGSET_SAMPLERS);
 | |
|             *writemask = VKD3DSP_WRITEMASK_ALL;
 | |
|         }
 | |
|         else if (regset == HLSL_REGSET_STREAM_OUTPUTS)
 | |
|         {
 | |
|             reg->type = VKD3DSPR_STREAM;
 | |
|             reg->dimension = VSIR_DIMENSION_NONE;
 | |
|             reg->idx[0].offset = var->regs[HLSL_REGSET_STREAM_OUTPUTS].index;
 | |
|             reg->idx_count = 1;
 | |
|             *writemask = VKD3DSP_WRITEMASK_ALL;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             unsigned int offset = deref->const_offset + var->buffer_offset;
 | |
| 
 | |
|             VKD3D_ASSERT(data_type->class <= HLSL_CLASS_VECTOR);
 | |
|             reg->type = VKD3DSPR_CONSTBUFFER;
 | |
|             reg->dimension = VSIR_DIMENSION_VEC4;
 | |
|             if (vkd3d_shader_ver_ge(version, 5, 1))
 | |
|             {
 | |
|                 reg->idx[0].offset = var->buffer->reg.id;
 | |
|                 reg->idx[1].offset = var->buffer->reg.index; /* FIXME: array index */
 | |
|                 reg->idx[2].offset = offset / 4;
 | |
|                 reg->idx_count = 3;
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 reg->idx[0].offset = var->buffer->reg.index;
 | |
|                 reg->idx[1].offset = offset / 4;
 | |
|                 reg->idx_count = 2;
 | |
|             }
 | |
| 
 | |
|             if (deref->rel_offset.node)
 | |
|             {
 | |
|                 if (!(reg->idx[reg->idx_count - 1].rel_addr = sm4_generate_vsir_new_idx_src(ctx,
 | |
|                         program, deref->rel_offset.node)))
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|             *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset & 3);
 | |
|         }
 | |
|     }
 | |
|     else if (var->is_input_semantic)
 | |
|     {
 | |
|         bool is_primitive = hlsl_type_is_primitive_array(var->data_type);
 | |
|         bool has_idx;
 | |
| 
 | |
|         if (sm4_register_from_semantic_name(version, var->semantic.name, false, ®->type, &has_idx))
 | |
|         {
 | |
|             unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
 | |
| 
 | |
|             VKD3D_ASSERT(!is_primitive);
 | |
| 
 | |
|             if (has_idx)
 | |
|             {
 | |
|                 reg->idx[0].offset = var->semantic.index + offset / 4;
 | |
|                 reg->idx_count = 1;
 | |
|             }
 | |
| 
 | |
|             if (shader_sm4_is_scalar_register(reg))
 | |
|                 reg->dimension = VSIR_DIMENSION_SCALAR;
 | |
|             else
 | |
|                 reg->dimension = VSIR_DIMENSION_VEC4;
 | |
|             *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset % 4);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
 | |
| 
 | |
|             VKD3D_ASSERT(hlsl_reg.allocated);
 | |
| 
 | |
|             reg->type = sm4_get_semantic_register_type(version->type, ctx->is_patch_constant_func, var);
 | |
|             reg->dimension = VSIR_DIMENSION_VEC4;
 | |
|             reg->idx[is_primitive ? 1 : 0].offset = hlsl_reg.id;
 | |
|             reg->idx_count = is_primitive ? 2 : 1;
 | |
|             *writemask = hlsl_reg.writemask;
 | |
|         }
 | |
| 
 | |
|         if (is_primitive)
 | |
|         {
 | |
|             reg->idx[0].offset = deref->const_offset / 4;
 | |
|             if (deref->rel_offset.node)
 | |
|             {
 | |
|                 if (!(reg->idx[0].rel_addr = sm4_generate_vsir_new_idx_src(ctx, program, deref->rel_offset.node)))
 | |
|                     return false;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     else if (var->is_output_semantic)
 | |
|     {
 | |
|         bool has_idx;
 | |
| 
 | |
|         if (sm4_register_from_semantic_name(version, var->semantic.name, true, ®->type, &has_idx))
 | |
|         {
 | |
|             unsigned int offset = hlsl_offset_from_deref_safe(ctx, deref);
 | |
| 
 | |
|             if (has_idx)
 | |
|             {
 | |
|                 reg->idx[0].offset = var->semantic.index + offset / 4;
 | |
|                 reg->idx_count = 1;
 | |
|             }
 | |
| 
 | |
|             if (shader_sm4_is_scalar_register(reg))
 | |
|                 reg->dimension = VSIR_DIMENSION_SCALAR;
 | |
|             else
 | |
|                 reg->dimension = VSIR_DIMENSION_VEC4;
 | |
|             *writemask = ((1u << data_type->e.numeric.dimx) - 1) << (offset % 4);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             struct hlsl_reg hlsl_reg = hlsl_reg_from_deref(ctx, deref);
 | |
| 
 | |
|             VKD3D_ASSERT(hlsl_reg.allocated);
 | |
|             reg->type = VKD3DSPR_OUTPUT;
 | |
|             reg->dimension = VSIR_DIMENSION_VEC4;
 | |
|             reg->idx[0].offset = hlsl_reg.id;
 | |
|             reg->idx_count = 1;
 | |
|             *writemask = hlsl_reg.writemask;
 | |
|         }
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         return sm4_generate_vsir_numeric_reg_from_deref(ctx, program, reg, writemask, deref);
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         struct vkd3d_shader_src_param *src_param, const struct hlsl_deref *deref,
 | |
|         unsigned int dst_writemask, const struct vkd3d_shader_location *loc)
 | |
| {
 | |
|     uint32_t writemask;
 | |
| 
 | |
|     if (!sm4_generate_vsir_reg_from_deref(ctx, program, &src_param->reg, &writemask, deref))
 | |
|         return false;
 | |
|     if (src_param->reg.dimension != VSIR_DIMENSION_NONE)
 | |
|         src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         struct vkd3d_shader_dst_param *dst_param, const struct hlsl_deref *deref,
 | |
|         const struct vkd3d_shader_location *loc, unsigned int writemask)
 | |
| {
 | |
|     uint32_t reg_writemask;
 | |
| 
 | |
|     if (!sm4_generate_vsir_reg_from_deref(ctx, program, &dst_param->reg, ®_writemask, deref))
 | |
|         return false;
 | |
|     dst_param->write_mask = hlsl_combine_writemasks(reg_writemask, writemask);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static void vsir_dst_from_hlsl_node(struct vkd3d_shader_dst_param *dst,
 | |
|         struct hlsl_ctx *ctx, const struct hlsl_ir_node *instr)
 | |
| {
 | |
|     VKD3D_ASSERT(instr->reg.allocated);
 | |
|     vsir_dst_param_init(dst, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, instr), 1);
 | |
|     dst->reg.idx[0].offset = instr->reg.id;
 | |
|     dst->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|     dst->write_mask = instr->reg.writemask;
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_instr_constant(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_constant *constant)
 | |
| {
 | |
|     struct hlsl_ir_node *instr = &constant->node;
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     VKD3D_ASSERT(instr->reg.allocated);
 | |
|     VKD3D_ASSERT(constant->reg.allocated);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
 | |
|         return;
 | |
| 
 | |
|     src_param = &ins->src[0];
 | |
|     vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
 | |
|     src_param->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|     src_param->reg.idx[0].offset = constant->reg.id;
 | |
|     src_param->swizzle = generate_vsir_get_src_swizzle(constant->reg.writemask, instr->reg.writemask);
 | |
| 
 | |
|     vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_rasterizer_sample_count(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_expr *expr)
 | |
| {
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
|     struct hlsl_ir_node *instr = &expr->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1)))
 | |
|         return;
 | |
|     ins->flags = VKD3DSI_SAMPLE_INFO_UINT;
 | |
| 
 | |
|     vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
 | |
| 
 | |
|     src_param = &ins->src[0];
 | |
|     vsir_src_param_init(src_param, VKD3DSPR_RASTERIZER, VKD3D_DATA_UNUSED, 0);
 | |
|     src_param->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|     src_param->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
 | |
| }
 | |
| 
 | |
| /* Translate ops that can be mapped to a single vsir instruction with only one dst register. */
 | |
| static void generate_vsir_instr_expr_single_instr_op(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode,
 | |
|         uint32_t src_mod, uint32_t dst_mod, bool map_src_swizzles)
 | |
| {
 | |
|     struct hlsl_ir_node *instr = &expr->node;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     unsigned int i, src_count = 0;
 | |
| 
 | |
|     VKD3D_ASSERT(instr->reg.allocated);
 | |
| 
 | |
|     for (i = 0; i < HLSL_MAX_OPERANDS; ++i)
 | |
|     {
 | |
|         if (expr->operands[i].node)
 | |
|             src_count = i + 1;
 | |
|     }
 | |
|     VKD3D_ASSERT(!src_mod || src_count == 1);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count)))
 | |
|         return;
 | |
| 
 | |
|     dst_param = &ins->dst[0];
 | |
|     vsir_dst_from_hlsl_node(dst_param, ctx, instr);
 | |
|     dst_param->modifiers = dst_mod;
 | |
| 
 | |
|     for (i = 0; i < src_count; ++i)
 | |
|     {
 | |
|         struct hlsl_ir_node *operand = expr->operands[i].node;
 | |
| 
 | |
|         src_param = &ins->src[i];
 | |
|         vsir_src_from_hlsl_node(src_param, ctx, operand,
 | |
|                 map_src_swizzles ? dst_param->write_mask : VKD3DSP_WRITEMASK_ALL);
 | |
|         src_param->modifiers = src_mod;
 | |
|     }
 | |
| }
 | |
| 
 | |
| /* Translate ops that have 1 src and need one instruction for each component in
 | |
|  * the d3dbc backend. */
 | |
| static void sm1_generate_vsir_instr_expr_per_component_instr_op(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_expr *expr, enum vkd3d_shader_opcode opcode)
 | |
| {
 | |
|     struct hlsl_ir_node *operand = expr->operands[0].node;
 | |
|     struct hlsl_ir_node *instr = &expr->node;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     uint32_t src_swizzle;
 | |
|     unsigned int i, c;
 | |
| 
 | |
|     VKD3D_ASSERT(instr->reg.allocated);
 | |
|     VKD3D_ASSERT(operand);
 | |
| 
 | |
|     src_swizzle = generate_vsir_get_src_swizzle(operand->reg.writemask, instr->reg.writemask);
 | |
|     for (i = 0; i < 4; ++i)
 | |
|     {
 | |
|         if (instr->reg.writemask & (1u << i))
 | |
|         {
 | |
|             if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 1)))
 | |
|                 return;
 | |
| 
 | |
|             dst_param = &ins->dst[0];
 | |
|             vsir_register_init(&dst_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
 | |
|             dst_param->reg.idx[0].offset = instr->reg.id;
 | |
|             dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|             dst_param->write_mask = 1u << i;
 | |
| 
 | |
|             src_param = &ins->src[0];
 | |
|             vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, VKD3D_DATA_FLOAT, 1);
 | |
|             src_param->reg.idx[0].offset = operand->reg.id;
 | |
|             src_param->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|             c = vsir_swizzle_get_component(src_swizzle, i);
 | |
|             src_param->swizzle = vsir_swizzle_from_writemask(1u << c);
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_instr_expr_sincos(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         struct hlsl_ir_expr *expr)
 | |
| {
 | |
|     struct hlsl_ir_node *operand = expr->operands[0].node;
 | |
|     struct hlsl_ir_node *instr = &expr->node;
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     unsigned int src_count = 0;
 | |
| 
 | |
|     VKD3D_ASSERT(instr->reg.allocated);
 | |
|     src_count = (ctx->profile->major_version < 3) ? 3 : 1;
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SINCOS, 1, src_count)))
 | |
|         return;
 | |
| 
 | |
|     vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
 | |
|     vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, VKD3DSP_WRITEMASK_ALL);
 | |
| 
 | |
|     if (ctx->profile->major_version < 3)
 | |
|     {
 | |
|         src_param = &ins->src[1];
 | |
|         vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
 | |
|         src_param->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|         src_param->reg.idx[0].offset = ctx->d3dsincosconst1.id;
 | |
|         src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE;
 | |
| 
 | |
|         src_param = &ins->src[2];
 | |
|         vsir_register_init(&src_param->reg, VKD3DSPR_CONST, VKD3D_DATA_FLOAT, 1);
 | |
|         src_param->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|         src_param->reg.idx[0].offset = ctx->d3dsincosconst2.id;
 | |
|         src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static bool sm1_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_expr *expr)
 | |
| {
 | |
|     const struct hlsl_type *src_type, *dst_type;
 | |
|     const struct hlsl_ir_node *arg1, *instr;
 | |
| 
 | |
|     arg1 = expr->operands[0].node;
 | |
|     src_type = arg1->data_type;
 | |
|     instr = &expr->node;
 | |
|     dst_type = instr->data_type;
 | |
| 
 | |
|     /* Narrowing casts were already lowered. */
 | |
|     VKD3D_ASSERT(src_type->e.numeric.dimx == dst_type->e.numeric.dimx);
 | |
| 
 | |
|     switch (dst_type->e.numeric.type)
 | |
|     {
 | |
|         case HLSL_TYPE_HALF:
 | |
|         case HLSL_TYPE_FLOAT:
 | |
|             switch (src_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_INT:
 | |
|                 case HLSL_TYPE_MIN16UINT:
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                 case HLSL_TYPE_BOOL:
 | |
|                     /* Integrals are internally represented as floats, so no change is necessary.*/
 | |
|                 case HLSL_TYPE_HALF:
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_DOUBLE:
 | |
|                     if (ctx->double_as_float_alias)
 | |
|                     {
 | |
|                         generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
 | |
|                         return true;
 | |
|                     }
 | |
|                     hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
 | |
|                             "The 'double' type is not supported for the %s profile.", ctx->profile->name);
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         case HLSL_TYPE_INT:
 | |
|         case HLSL_TYPE_MIN16UINT:
 | |
|         case HLSL_TYPE_UINT:
 | |
|             switch (src_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_HALF:
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     /* A compilation pass turns these into FLOOR+REINTERPRET, so we should not
 | |
|                      * reach this case unless we are missing something. */
 | |
|                     hlsl_fixme(ctx, &instr->loc, "Unlowered SM1 cast from float to integer.");
 | |
|                     break;
 | |
| 
 | |
|                 case HLSL_TYPE_INT:
 | |
|                 case HLSL_TYPE_MIN16UINT:
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                 case HLSL_TYPE_BOOL:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_DOUBLE:
 | |
|                     hlsl_fixme(ctx, &instr->loc, "SM1 cast from double to integer.");
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         case HLSL_TYPE_DOUBLE:
 | |
|             switch (src_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
 | |
|                     return true;
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &instr->loc, "SM1 cast to double.");
 | |
|                     break;
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         case HLSL_TYPE_BOOL:
 | |
|             /* Casts to bool should have already been lowered. */
 | |
|             hlsl_fixme(ctx, &expr->node.loc, "SM1 cast from %s to %s.",
 | |
|                 debug_hlsl_type(ctx, src_type), debug_hlsl_type(ctx, dst_type));
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool sm1_generate_vsir_instr_expr(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         struct hlsl_ir_expr *expr)
 | |
| {
 | |
|     struct hlsl_ir_node *instr = &expr->node;
 | |
|     struct hlsl_type *type = instr->data_type;
 | |
| 
 | |
|     if (!hlsl_is_numeric_type(type))
 | |
|         goto err;
 | |
| 
 | |
|     if (type->e.numeric.type == HLSL_TYPE_DOUBLE && !ctx->double_as_float_alias)
 | |
|     {
 | |
|         hlsl_error(ctx, &instr->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
 | |
|                 "The 'double' type is not supported for the %s profile.", ctx->profile->name);
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     switch (expr->op)
 | |
|     {
 | |
|         case HLSL_OP1_ABS:
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ABS, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP1_CAST:
 | |
|             return sm1_generate_vsir_instr_expr_cast(ctx, program, expr);
 | |
| 
 | |
|         case HLSL_OP1_COS_REDUCED:
 | |
|             VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_0);
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             sm1_generate_vsir_instr_expr_sincos(ctx, program, expr);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP1_DSX:
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP1_DSY:
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP1_EXP2:
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_EXP);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP1_LOG2:
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_LOG);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP1_NEG:
 | |
|             if (type->e.numeric.type == HLSL_TYPE_BOOL)
 | |
|                 goto err;
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP1_RCP:
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RCP);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP1_REINTERPRET:
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP1_RSQ:
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             sm1_generate_vsir_instr_expr_per_component_instr_op(ctx, program, expr, VKD3DSIH_RSQ);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP1_SAT:
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP1_SIN_REDUCED:
 | |
|             VKD3D_ASSERT(expr->node.reg.writemask == VKD3DSP_WRITEMASK_1);
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             sm1_generate_vsir_instr_expr_sincos(ctx, program, expr);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP2_ADD:
 | |
|             if (type->e.numeric.type == HLSL_TYPE_BOOL)
 | |
|                 goto err;
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP2_DOT:
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             switch (expr->operands[0].node->data_type->e.numeric.dimx)
 | |
|             {
 | |
|                 case 3:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false);
 | |
|                     break;
 | |
| 
 | |
|                 case 4:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false);
 | |
|                     break;
 | |
| 
 | |
|                 default:
 | |
|                     vkd3d_unreachable();
 | |
|                     return false;
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP2_MAX:
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP2_MIN:
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP2_MUL:
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP1_FRACT:
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP2_LOGIC_AND:
 | |
|             if (type->e.numeric.type != HLSL_TYPE_BOOL)
 | |
|                 goto err;
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP2_LOGIC_OR:
 | |
|             if (type->e.numeric.type != HLSL_TYPE_BOOL)
 | |
|                 goto err;
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP2_SLT:
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SLT, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP3_CMP:
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_CMP, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP3_DP2ADD:
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2ADD, 0, 0, false);
 | |
|             break;
 | |
| 
 | |
|         case HLSL_OP3_MAD:
 | |
|             if (!hlsl_type_is_floating_point(type))
 | |
|                 goto err;
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true);
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             goto err;
 | |
|     }
 | |
|     return true;
 | |
| 
 | |
| err:
 | |
|     hlsl_fixme(ctx, &instr->loc, "SM1 %s expression of type %s.", debug_hlsl_expr_op(expr->op), instr->data_type->name);
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_init_dst_param_from_deref(struct hlsl_ctx *ctx,
 | |
|         struct vkd3d_shader_dst_param *dst_param, struct hlsl_deref *deref,
 | |
|         const struct vkd3d_shader_location *loc, unsigned int writemask)
 | |
| {
 | |
|     enum vkd3d_shader_register_type type = VKD3DSPR_TEMP;
 | |
|     struct vkd3d_shader_version version;
 | |
|     uint32_t register_index;
 | |
|     struct hlsl_reg reg;
 | |
| 
 | |
|     reg = hlsl_reg_from_deref(ctx, deref);
 | |
|     register_index = reg.id;
 | |
|     writemask = hlsl_combine_writemasks(reg.writemask, writemask);
 | |
| 
 | |
|     if (deref->var->is_output_semantic)
 | |
|     {
 | |
|         const char *semantic_name = deref->var->semantic.name;
 | |
| 
 | |
|         version.major = ctx->profile->major_version;
 | |
|         version.minor = ctx->profile->minor_version;
 | |
|         version.type = ctx->profile->type;
 | |
| 
 | |
|         if (version.type == VKD3D_SHADER_TYPE_PIXEL && version.major == 1)
 | |
|         {
 | |
|             type = VKD3DSPR_TEMP;
 | |
|             register_index = 0;
 | |
|         }
 | |
|         else if (!sm1_register_from_semantic_name(&version, semantic_name,
 | |
|                 deref->var->semantic.index, true, NULL, &type, ®ister_index))
 | |
|         {
 | |
|             VKD3D_ASSERT(reg.allocated);
 | |
|             type = VKD3DSPR_OUTPUT;
 | |
|             register_index = reg.id;
 | |
|         }
 | |
|         else
 | |
|             writemask = (1u << deref->var->data_type->e.numeric.dimx) - 1;
 | |
| 
 | |
|         if (version.type == VKD3D_SHADER_TYPE_PIXEL && (!ascii_strcasecmp(semantic_name, "PSIZE")
 | |
|                 || (!ascii_strcasecmp(semantic_name, "FOG") && version.major < 3)))
 | |
|         {
 | |
|             /* These are always 1-component, but for some reason are written
 | |
|              * with a writemask containing all components. */
 | |
|             writemask = VKD3DSP_WRITEMASK_ALL;
 | |
|         }
 | |
|     }
 | |
|     else
 | |
|         VKD3D_ASSERT(reg.allocated);
 | |
| 
 | |
|     if (type == VKD3DSPR_DEPTHOUT)
 | |
|     {
 | |
|         vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0);
 | |
|         dst_param->reg.dimension = VSIR_DIMENSION_SCALAR;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1);
 | |
|         dst_param->reg.idx[0].offset = register_index;
 | |
|         dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|     }
 | |
|     dst_param->write_mask = writemask;
 | |
| 
 | |
|     if (deref->rel_offset.node)
 | |
|         hlsl_fixme(ctx, loc, "Translate relative addressing on dst register for vsir.");
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_instr_mova(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_node *instr)
 | |
| {
 | |
|     enum vkd3d_shader_opcode opcode = hlsl_version_ge(ctx, 2, 0) ? VKD3DSIH_MOVA : VKD3DSIH_MOV;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     VKD3D_ASSERT(instr->reg.allocated);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 1)))
 | |
|         return;
 | |
| 
 | |
|     dst_param = &ins->dst[0];
 | |
|     vsir_register_init(&dst_param->reg, VKD3DSPR_ADDR, VKD3D_DATA_FLOAT, 0);
 | |
|     dst_param->write_mask = VKD3DSP_WRITEMASK_0;
 | |
| 
 | |
|     VKD3D_ASSERT(instr->data_type->class <= HLSL_CLASS_VECTOR);
 | |
|     VKD3D_ASSERT(instr->data_type->e.numeric.dimx == 1);
 | |
|     vsir_src_from_hlsl_node(&ins->src[0], ctx, instr, VKD3DSP_WRITEMASK_ALL);
 | |
| }
 | |
| 
 | |
| static struct vkd3d_shader_src_param *sm1_generate_vsir_new_address_src(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program)
 | |
| {
 | |
|     struct vkd3d_shader_src_param *idx_src;
 | |
| 
 | |
|     if (!(idx_src = vsir_program_get_src_params(program, 1)))
 | |
|     {
 | |
|         ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|         return NULL;
 | |
|     }
 | |
| 
 | |
|     memset(idx_src, 0, sizeof(*idx_src));
 | |
|     vsir_register_init(&idx_src->reg, VKD3DSPR_ADDR, VKD3D_DATA_FLOAT, 0);
 | |
|     idx_src->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|     idx_src->swizzle = VKD3D_SHADER_SWIZZLE(X, X, X, X);
 | |
|     return idx_src;
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_init_src_param_from_deref(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct vkd3d_shader_src_param *src_param,
 | |
|         struct hlsl_deref *deref, uint32_t dst_writemask, const struct vkd3d_shader_location *loc)
 | |
| {
 | |
|     enum vkd3d_shader_register_type type = VKD3DSPR_TEMP;
 | |
|     struct vkd3d_shader_src_param *src_rel_addr = NULL;
 | |
|     struct vkd3d_shader_version version;
 | |
|     uint32_t register_index;
 | |
|     unsigned int writemask;
 | |
|     struct hlsl_reg reg;
 | |
| 
 | |
|     if (hlsl_type_is_resource(deref->var->data_type))
 | |
|     {
 | |
|         unsigned int sampler_offset;
 | |
| 
 | |
|         type = VKD3DSPR_COMBINED_SAMPLER;
 | |
| 
 | |
|         sampler_offset = hlsl_offset_from_deref_safe(ctx, deref);
 | |
|         register_index = deref->var->regs[HLSL_REGSET_SAMPLERS].index + sampler_offset;
 | |
|         writemask = VKD3DSP_WRITEMASK_ALL;
 | |
|     }
 | |
|     else if (deref->var->is_uniform)
 | |
|     {
 | |
|         unsigned int offset = deref->const_offset;
 | |
| 
 | |
|         type = VKD3DSPR_CONST;
 | |
|         register_index = deref->var->regs[HLSL_REGSET_NUMERIC].id + offset / 4;
 | |
| 
 | |
|         writemask = 0xf & (0xf << (offset % 4));
 | |
|         if (deref->var->regs[HLSL_REGSET_NUMERIC].writemask)
 | |
|             writemask = hlsl_combine_writemasks(deref->var->regs[HLSL_REGSET_NUMERIC].writemask, writemask);
 | |
| 
 | |
|         if (deref->rel_offset.node)
 | |
|         {
 | |
|             VKD3D_ASSERT(deref_supports_sm1_indirect_addressing(ctx, deref));
 | |
| 
 | |
|             if (!(src_rel_addr = sm1_generate_vsir_new_address_src(ctx, program)))
 | |
|             {
 | |
|                 ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|                 return;
 | |
|             }
 | |
|         }
 | |
|         VKD3D_ASSERT(deref->var->regs[HLSL_REGSET_NUMERIC].allocated);
 | |
|     }
 | |
|     else if (deref->var->is_input_semantic)
 | |
|     {
 | |
|         version.major = ctx->profile->major_version;
 | |
|         version.minor = ctx->profile->minor_version;
 | |
|         version.type = ctx->profile->type;
 | |
|         if (sm1_register_from_semantic_name(&version, deref->var->semantic.name,
 | |
|                 deref->var->semantic.index, false, NULL, &type, ®ister_index))
 | |
|         {
 | |
|             writemask = (1 << deref->var->data_type->e.numeric.dimx) - 1;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             type = VKD3DSPR_INPUT;
 | |
| 
 | |
|             reg = hlsl_reg_from_deref(ctx, deref);
 | |
|             register_index = reg.id;
 | |
|             writemask = reg.writemask;
 | |
|             VKD3D_ASSERT(reg.allocated);
 | |
|         }
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         type = VKD3DSPR_TEMP;
 | |
| 
 | |
|         reg = hlsl_reg_from_deref(ctx, deref);
 | |
|         register_index = reg.id;
 | |
|         writemask = reg.writemask;
 | |
|     }
 | |
| 
 | |
|     vsir_register_init(&src_param->reg, type, VKD3D_DATA_FLOAT, 1);
 | |
|     src_param->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|     src_param->reg.idx[0].offset = register_index;
 | |
|     src_param->reg.idx[0].rel_addr = src_rel_addr;
 | |
|     src_param->swizzle = generate_vsir_get_src_swizzle(writemask, dst_writemask);
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         struct hlsl_ir_load *load)
 | |
| {
 | |
|     struct hlsl_ir_node *instr = &load->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     VKD3D_ASSERT(instr->reg.allocated);
 | |
| 
 | |
|     if (load->src.rel_offset.node)
 | |
|         sm1_generate_vsir_instr_mova(ctx, program, load->src.rel_offset.node);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
 | |
|         return;
 | |
| 
 | |
|     vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
 | |
| 
 | |
|     sm1_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[0],
 | |
|             &load->src, ins->dst[0].write_mask, &ins->location);
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_resource_load *load)
 | |
| {
 | |
|     struct hlsl_ir_node *coords = load->coords.node;
 | |
|     struct hlsl_ir_node *ddx = load->ddx.node;
 | |
|     struct hlsl_ir_node *ddy = load->ddy.node;
 | |
|     struct hlsl_ir_node *instr = &load->node;
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     enum vkd3d_shader_opcode opcode;
 | |
|     unsigned int src_count = 2;
 | |
|     uint32_t flags = 0;
 | |
| 
 | |
|     VKD3D_ASSERT(instr->reg.allocated);
 | |
| 
 | |
|     switch (load->load_type)
 | |
|     {
 | |
|         case HLSL_RESOURCE_SAMPLE:
 | |
|             opcode = VKD3DSIH_TEX;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_RESOURCE_SAMPLE_PROJ:
 | |
|             opcode = VKD3DSIH_TEX;
 | |
|             flags |= VKD3DSI_TEXLD_PROJECT;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_RESOURCE_SAMPLE_LOD_BIAS:
 | |
|             opcode = VKD3DSIH_TEX;
 | |
|             flags |= VKD3DSI_TEXLD_BIAS;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_RESOURCE_SAMPLE_GRAD:
 | |
|             opcode = VKD3DSIH_TEXLDD;
 | |
|             src_count += 2;
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             hlsl_fixme(ctx, &instr->loc, "Resource load type %u.", load->load_type);
 | |
|             return;
 | |
|     }
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count)))
 | |
|         return;
 | |
|     ins->flags = flags;
 | |
| 
 | |
|     vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
 | |
| 
 | |
|     src_param = &ins->src[0];
 | |
|     vsir_src_from_hlsl_node(src_param, ctx, coords, VKD3DSP_WRITEMASK_ALL);
 | |
| 
 | |
|     sm1_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1], &load->resource,
 | |
|             VKD3DSP_WRITEMASK_ALL, &ins->location);
 | |
| 
 | |
|     if (load->load_type == HLSL_RESOURCE_SAMPLE_GRAD)
 | |
|     {
 | |
|         src_param = &ins->src[2];
 | |
|         vsir_src_from_hlsl_node(src_param, ctx, ddx, VKD3DSP_WRITEMASK_ALL);
 | |
| 
 | |
|         src_param = &ins->src[3];
 | |
|         vsir_src_from_hlsl_node(src_param, ctx, ddy, VKD3DSP_WRITEMASK_ALL);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void generate_vsir_instr_swizzle(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_swizzle *swizzle_instr)
 | |
| {
 | |
|     struct hlsl_ir_node *instr = &swizzle_instr->node, *val = swizzle_instr->val.node;
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     uint32_t swizzle;
 | |
| 
 | |
|     VKD3D_ASSERT(instr->reg.allocated);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
 | |
|         return;
 | |
| 
 | |
|     vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
 | |
| 
 | |
|     swizzle = hlsl_swizzle_from_writemask(val->reg.writemask);
 | |
|     swizzle = hlsl_combine_swizzles(swizzle, swizzle_instr->u.vector, instr->data_type->e.numeric.dimx);
 | |
|     swizzle = hlsl_map_swizzle(swizzle, ins->dst[0].write_mask);
 | |
| 
 | |
|     src_param = &ins->src[0];
 | |
|     VKD3D_ASSERT(val->type != HLSL_IR_CONSTANT);
 | |
|     vsir_register_init(&src_param->reg, VKD3DSPR_TEMP, vsir_data_type_from_hlsl_instruction(ctx, val), 1);
 | |
|     src_param->reg.idx[0].offset = val->reg.id;
 | |
|     src_param->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|     src_param->swizzle = swizzle;
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_instr_store(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         struct hlsl_ir_store *store)
 | |
| {
 | |
|     struct hlsl_ir_node *rhs = store->rhs.node;
 | |
|     struct hlsl_ir_node *instr = &store->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
 | |
|         return;
 | |
| 
 | |
|     sm1_generate_vsir_init_dst_param_from_deref(ctx, &ins->dst[0], &store->lhs, &ins->location, store->writemask);
 | |
| 
 | |
|     src_param = &ins->src[0];
 | |
|     vsir_src_from_hlsl_node(src_param, ctx, rhs, ins->dst[0].write_mask);
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_instr_jump(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_jump *jump)
 | |
| {
 | |
|     struct hlsl_ir_node *condition = jump->condition.node;
 | |
|     struct hlsl_ir_node *instr = &jump->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (jump->type == HLSL_IR_JUMP_DISCARD_NEG)
 | |
|     {
 | |
|         if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_TEXKILL, 0, 1)))
 | |
|             return;
 | |
| 
 | |
|         vsir_src_from_hlsl_node(&ins->src[0], ctx, condition, VKD3DSP_WRITEMASK_ALL);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         hlsl_fixme(ctx, &instr->loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type));
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program);
 | |
| 
 | |
| static void sm1_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff)
 | |
| {
 | |
|     struct hlsl_ir_node *condition = iff->condition.node;
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
|     struct hlsl_ir_node *instr = &iff->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (hlsl_version_lt(ctx, 2, 1))
 | |
|     {
 | |
|         hlsl_fixme(ctx, &instr->loc, "Flatten \"if\" conditionals branches.");
 | |
|         return;
 | |
|     }
 | |
|     VKD3D_ASSERT(condition->data_type->e.numeric.dimx == 1 && condition->data_type->e.numeric.dimy == 1);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IFC, 0, 2)))
 | |
|         return;
 | |
|     ins->flags = VKD3D_SHADER_REL_OP_NE;
 | |
| 
 | |
|     src_param = &ins->src[0];
 | |
|     vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL);
 | |
|     src_param->modifiers = 0;
 | |
| 
 | |
|     src_param = &ins->src[1];
 | |
|     vsir_src_from_hlsl_node(src_param, ctx, condition, VKD3DSP_WRITEMASK_ALL);
 | |
|     src_param->modifiers = VKD3DSPSM_NEG;
 | |
| 
 | |
|     sm1_generate_vsir_block(ctx, &iff->then_block, program);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0)))
 | |
|         return;
 | |
| 
 | |
|     sm1_generate_vsir_block(ctx, &iff->else_block, program);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0)))
 | |
|         return;
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program)
 | |
| {
 | |
|     struct hlsl_ir_node *instr, *next;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         if (instr->data_type)
 | |
|         {
 | |
|             if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
 | |
|             {
 | |
|                 hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class);
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         switch (instr->type)
 | |
|         {
 | |
|             case HLSL_IR_CALL:
 | |
|                 vkd3d_unreachable();
 | |
| 
 | |
|             case HLSL_IR_CONSTANT:
 | |
|                 sm1_generate_vsir_instr_constant(ctx, program, hlsl_ir_constant(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_EXPR:
 | |
|                 sm1_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_IF:
 | |
|                 sm1_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_JUMP:
 | |
|                 sm1_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_LOAD:
 | |
|                 sm1_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_RESOURCE_LOAD:
 | |
|                 sm1_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_STORE:
 | |
|                 sm1_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_SWIZZLE:
 | |
|                 generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr));
 | |
|                 break;
 | |
| 
 | |
|             default:
 | |
|                 hlsl_fixme(ctx, &instr->loc, "Instruction type %s.", hlsl_node_type_to_string(instr->type));
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void sm1_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
 | |
|         uint64_t config_flags, struct vsir_program *program)
 | |
| {
 | |
|     struct vkd3d_shader_version version = {0};
 | |
|     struct hlsl_block block;
 | |
| 
 | |
|     version.major = ctx->profile->major_version;
 | |
|     version.minor = ctx->profile->minor_version;
 | |
|     version.type = ctx->profile->type;
 | |
|     if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4))
 | |
|     {
 | |
|         ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     program->temp_count = allocate_temp_registers(ctx, entry_func);
 | |
|     if (ctx->result)
 | |
|         return;
 | |
| 
 | |
|     generate_vsir_signature(ctx, program, entry_func);
 | |
| 
 | |
|     hlsl_block_init(&block);
 | |
|     sm1_generate_vsir_constant_defs(ctx, program, &block);
 | |
|     sm1_generate_vsir_sampler_dcls(ctx, program, &block);
 | |
|     list_move_head(&entry_func->body.instrs, &block.instrs);
 | |
| 
 | |
|     sm1_generate_vsir_block(ctx, &entry_func->body, program);
 | |
| }
 | |
| 
 | |
| D3DXPARAMETER_CLASS hlsl_sm1_class(const struct hlsl_type *type)
 | |
| {
 | |
|     switch (type->class)
 | |
|     {
 | |
|         case HLSL_CLASS_ARRAY:
 | |
|             return hlsl_sm1_class(type->e.array.type);
 | |
|         case HLSL_CLASS_MATRIX:
 | |
|             VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
 | |
|             if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
 | |
|                 return D3DXPC_MATRIX_COLUMNS;
 | |
|             else
 | |
|                 return D3DXPC_MATRIX_ROWS;
 | |
|         case HLSL_CLASS_SCALAR:
 | |
|             return D3DXPC_SCALAR;
 | |
|         case HLSL_CLASS_STRUCT:
 | |
|             return D3DXPC_STRUCT;
 | |
|         case HLSL_CLASS_VECTOR:
 | |
|             return D3DXPC_VECTOR;
 | |
|         case HLSL_CLASS_PIXEL_SHADER:
 | |
|         case HLSL_CLASS_SAMPLER:
 | |
|         case HLSL_CLASS_STRING:
 | |
|         case HLSL_CLASS_TEXTURE:
 | |
|         case HLSL_CLASS_VERTEX_SHADER:
 | |
|             return D3DXPC_OBJECT;
 | |
|         case HLSL_CLASS_DEPTH_STENCIL_STATE:
 | |
|         case HLSL_CLASS_DEPTH_STENCIL_VIEW:
 | |
|         case HLSL_CLASS_EFFECT_GROUP:
 | |
|         case HLSL_CLASS_ERROR:
 | |
|         case HLSL_CLASS_PASS:
 | |
|         case HLSL_CLASS_RASTERIZER_STATE:
 | |
|         case HLSL_CLASS_RENDER_TARGET_VIEW:
 | |
|         case HLSL_CLASS_TECHNIQUE:
 | |
|         case HLSL_CLASS_UAV:
 | |
|         case HLSL_CLASS_VOID:
 | |
|         case HLSL_CLASS_CONSTANT_BUFFER:
 | |
|         case HLSL_CLASS_COMPUTE_SHADER:
 | |
|         case HLSL_CLASS_DOMAIN_SHADER:
 | |
|         case HLSL_CLASS_HULL_SHADER:
 | |
|         case HLSL_CLASS_GEOMETRY_SHADER:
 | |
|         case HLSL_CLASS_BLEND_STATE:
 | |
|         case HLSL_CLASS_STREAM_OUTPUT:
 | |
|         case HLSL_CLASS_NULL:
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| D3DXPARAMETER_TYPE hlsl_sm1_base_type(const struct hlsl_type *type, bool is_combined_sampler)
 | |
| {
 | |
|     enum hlsl_type_class class = type->class;
 | |
| 
 | |
|     if (is_combined_sampler)
 | |
|         class = HLSL_CLASS_TEXTURE;
 | |
| 
 | |
|     switch (class)
 | |
|     {
 | |
|         case HLSL_CLASS_SCALAR:
 | |
|         case HLSL_CLASS_VECTOR:
 | |
|         case HLSL_CLASS_MATRIX:
 | |
|             switch (type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_BOOL:
 | |
|                     return D3DXPT_BOOL;
 | |
|                 /* Actually double behaves differently depending on DLL version:
 | |
|                  * For <= 36, it maps to D3DXPT_FLOAT.
 | |
|                  * For 37-40, it maps to zero (D3DXPT_VOID).
 | |
|                  * For >= 41, it maps to 39, which is D3D_SVT_DOUBLE (note D3D_SVT_*
 | |
|                  * values are mostly compatible with D3DXPT_*).
 | |
|                  * However, the latter two cases look like bugs, and a reasonable
 | |
|                  * application certainly wouldn't know what to do with them.
 | |
|                  * For fx_2_0 it's always D3DXPT_FLOAT regardless of DLL version. */
 | |
|                 case HLSL_TYPE_DOUBLE:
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                 case HLSL_TYPE_HALF:
 | |
|                     return D3DXPT_FLOAT;
 | |
|                 case HLSL_TYPE_INT:
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     return D3DXPT_INT;
 | |
|                 /* Minimum-precision types are not supported until 46, but at
 | |
|                  * that point they do the same thing, and return sm4 types. */
 | |
|                 case HLSL_TYPE_MIN16UINT:
 | |
|                     return 0x39;
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         case HLSL_CLASS_SAMPLER:
 | |
|             switch (type->sampler_dim)
 | |
|             {
 | |
|                 case HLSL_SAMPLER_DIM_1D:
 | |
|                     return D3DXPT_SAMPLER1D;
 | |
|                 case HLSL_SAMPLER_DIM_2D:
 | |
|                     return D3DXPT_SAMPLER2D;
 | |
|                 case HLSL_SAMPLER_DIM_3D:
 | |
|                     return D3DXPT_SAMPLER3D;
 | |
|                 case HLSL_SAMPLER_DIM_CUBE:
 | |
|                     return D3DXPT_SAMPLERCUBE;
 | |
|                 case HLSL_SAMPLER_DIM_GENERIC:
 | |
|                     return D3DXPT_SAMPLER;
 | |
|                 default:
 | |
|                     ERR("Invalid dimension %#x.\n", type->sampler_dim);
 | |
|                     vkd3d_unreachable();
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         case HLSL_CLASS_TEXTURE:
 | |
|             switch (type->sampler_dim)
 | |
|             {
 | |
|                 case HLSL_SAMPLER_DIM_1D:
 | |
|                     return D3DXPT_TEXTURE1D;
 | |
|                 case HLSL_SAMPLER_DIM_2D:
 | |
|                     return D3DXPT_TEXTURE2D;
 | |
|                 case HLSL_SAMPLER_DIM_3D:
 | |
|                     return D3DXPT_TEXTURE3D;
 | |
|                 case HLSL_SAMPLER_DIM_CUBE:
 | |
|                     return D3DXPT_TEXTURECUBE;
 | |
|                 case HLSL_SAMPLER_DIM_GENERIC:
 | |
|                     return D3DXPT_TEXTURE;
 | |
|                 default:
 | |
|                     ERR("Invalid dimension %#x.\n", type->sampler_dim);
 | |
|                     vkd3d_unreachable();
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         case HLSL_CLASS_ARRAY:
 | |
|             return hlsl_sm1_base_type(type->e.array.type, is_combined_sampler);
 | |
| 
 | |
|         case HLSL_CLASS_STRUCT:
 | |
|             return D3DXPT_VOID;
 | |
| 
 | |
|         case HLSL_CLASS_STRING:
 | |
|             return D3DXPT_STRING;
 | |
| 
 | |
|         case HLSL_CLASS_PIXEL_SHADER:
 | |
|             return D3DXPT_PIXELSHADER;
 | |
| 
 | |
|         case HLSL_CLASS_VERTEX_SHADER:
 | |
|             return D3DXPT_VERTEXSHADER;
 | |
| 
 | |
|         case HLSL_CLASS_DEPTH_STENCIL_STATE:
 | |
|         case HLSL_CLASS_DEPTH_STENCIL_VIEW:
 | |
|         case HLSL_CLASS_EFFECT_GROUP:
 | |
|         case HLSL_CLASS_ERROR:
 | |
|         case HLSL_CLASS_PASS:
 | |
|         case HLSL_CLASS_RASTERIZER_STATE:
 | |
|         case HLSL_CLASS_RENDER_TARGET_VIEW:
 | |
|         case HLSL_CLASS_TECHNIQUE:
 | |
|         case HLSL_CLASS_UAV:
 | |
|         case HLSL_CLASS_VOID:
 | |
|         case HLSL_CLASS_CONSTANT_BUFFER:
 | |
|         case HLSL_CLASS_COMPUTE_SHADER:
 | |
|         case HLSL_CLASS_DOMAIN_SHADER:
 | |
|         case HLSL_CLASS_HULL_SHADER:
 | |
|         case HLSL_CLASS_GEOMETRY_SHADER:
 | |
|         case HLSL_CLASS_BLEND_STATE:
 | |
|         case HLSL_CLASS_STREAM_OUTPUT:
 | |
|         case HLSL_CLASS_NULL:
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| static void write_sm1_type(struct vkd3d_bytecode_buffer *buffer,
 | |
|         struct hlsl_type *type, bool is_combined_sampler, unsigned int ctab_start)
 | |
| {
 | |
|     const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type);
 | |
|     unsigned int array_size = hlsl_get_multiarray_size(type);
 | |
|     struct hlsl_struct_field *field;
 | |
|     size_t i;
 | |
| 
 | |
|     if (type->bytecode_offset)
 | |
|         return;
 | |
| 
 | |
|     if (array_type->class == HLSL_CLASS_STRUCT)
 | |
|     {
 | |
|         unsigned int field_count = array_type->e.record.field_count;
 | |
|         size_t fields_offset;
 | |
| 
 | |
|         for (i = 0; i < field_count; ++i)
 | |
|         {
 | |
|             field = &array_type->e.record.fields[i];
 | |
|             field->name_bytecode_offset = put_string(buffer, field->name);
 | |
|             write_sm1_type(buffer, field->type, false, ctab_start);
 | |
|         }
 | |
| 
 | |
|         fields_offset = bytecode_align(buffer) - ctab_start;
 | |
| 
 | |
|         for (i = 0; i < field_count; ++i)
 | |
|         {
 | |
|             field = &array_type->e.record.fields[i];
 | |
|             put_u32(buffer, field->name_bytecode_offset - ctab_start);
 | |
|             put_u32(buffer, field->type->bytecode_offset - ctab_start);
 | |
|         }
 | |
| 
 | |
|         type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3DXPC_STRUCT, D3DXPT_VOID));
 | |
|         put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type)));
 | |
|         put_u32(buffer, vkd3d_make_u32(array_size, field_count));
 | |
|         put_u32(buffer, fields_offset);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         type->bytecode_offset = put_u32(buffer,
 | |
|                 vkd3d_make_u32(hlsl_sm1_class(type), hlsl_sm1_base_type(array_type, is_combined_sampler)));
 | |
|         if (hlsl_is_numeric_type(array_type))
 | |
|             put_u32(buffer, vkd3d_make_u32(array_type->e.numeric.dimy, array_type->e.numeric.dimx));
 | |
|         else
 | |
|             put_u32(buffer, vkd3d_make_u32(1, 1));
 | |
|         put_u32(buffer, vkd3d_make_u32(array_size, 0));
 | |
|         put_u32(buffer, 1);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void sm1_sort_extern(struct list *sorted, struct hlsl_ir_var *to_sort)
 | |
| {
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     list_remove(&to_sort->extern_entry);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, sorted, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (strcmp(to_sort->name, var->name) < 0)
 | |
|         {
 | |
|             list_add_before(&var->extern_entry, &to_sort->extern_entry);
 | |
|             return;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     list_add_tail(sorted, &to_sort->extern_entry);
 | |
| }
 | |
| 
 | |
| static void sm1_sort_externs(struct hlsl_ctx *ctx)
 | |
| {
 | |
|     struct list sorted = LIST_INIT(sorted);
 | |
|     struct hlsl_ir_var *var, *next;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY_SAFE(var, next, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (var->is_uniform)
 | |
|             sm1_sort_extern(&sorted, var);
 | |
|     }
 | |
|     list_move_tail(&ctx->extern_vars, &sorted);
 | |
| }
 | |
| 
 | |
| static void write_sm1_uniforms(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer)
 | |
| {
 | |
|     size_t ctab_start, vars_offset, vars_start, creator_offset, offset;
 | |
|     unsigned int uniform_count = 0, r;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         for (r = 0; r <= HLSL_REGSET_LAST; ++r)
 | |
|         {
 | |
|             if (var->semantic.name || !var->regs[r].allocated || !var->last_read)
 | |
|                 continue;
 | |
| 
 | |
|             ++uniform_count;
 | |
| 
 | |
|             if (var->is_param && var->is_uniform)
 | |
|             {
 | |
|                 char *new_name;
 | |
| 
 | |
|                 if (!(new_name = hlsl_sprintf_alloc(ctx, "$%s", var->name)))
 | |
|                     return;
 | |
|                 vkd3d_free((char *)var->name);
 | |
|                 var->name = new_name;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     sm1_sort_externs(ctx);
 | |
| 
 | |
|     ctab_start = put_u32(buffer, 7 * sizeof(uint32_t)); /* CTAB header size. */
 | |
|     creator_offset = put_u32(buffer, 0);
 | |
|     if (ctx->profile->type == VKD3D_SHADER_TYPE_VERTEX)
 | |
|         put_u32(buffer, D3DVS_VERSION(ctx->profile->major_version, ctx->profile->minor_version));
 | |
|     else
 | |
|         put_u32(buffer, D3DPS_VERSION(ctx->profile->major_version, ctx->profile->minor_version));
 | |
|     put_u32(buffer, uniform_count);
 | |
|     vars_offset = put_u32(buffer, 0);
 | |
|     put_u32(buffer, 0); /* FIXME: flags */
 | |
|     put_u32(buffer, 0); /* FIXME: target string */
 | |
| 
 | |
|     vars_start = bytecode_align(buffer);
 | |
|     set_u32(buffer, vars_offset, vars_start - ctab_start);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         for (r = 0; r <= HLSL_REGSET_LAST; ++r)
 | |
|         {
 | |
|             if (var->semantic.name || !var->regs[r].allocated || !var->last_read)
 | |
|                 continue;
 | |
| 
 | |
|             put_u32(buffer, 0); /* name */
 | |
|             if (r == HLSL_REGSET_NUMERIC)
 | |
|             {
 | |
|                 put_u32(buffer, vkd3d_make_u32(D3DXRS_FLOAT4, var->regs[r].id));
 | |
|                 put_u32(buffer, var->bind_count[r]);
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 put_u32(buffer, vkd3d_make_u32(D3DXRS_SAMPLER, var->regs[r].index));
 | |
|                 put_u32(buffer, var->bind_count[r]);
 | |
|             }
 | |
|             put_u32(buffer, 0); /* type */
 | |
|             put_u32(buffer, 0); /* default value */
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     uniform_count = 0;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         for (r = 0; r <= HLSL_REGSET_LAST; ++r)
 | |
|         {
 | |
|             size_t var_offset, name_offset;
 | |
| 
 | |
|             if (var->semantic.name || !var->regs[r].allocated || !var->last_read)
 | |
|                 continue;
 | |
| 
 | |
|             var_offset = vars_start + (uniform_count * 5 * sizeof(uint32_t));
 | |
| 
 | |
|             name_offset = put_string(buffer, var->name);
 | |
|             set_u32(buffer, var_offset, name_offset - ctab_start);
 | |
| 
 | |
|             write_sm1_type(buffer, var->data_type, var->is_combined_sampler, ctab_start);
 | |
|             set_u32(buffer, var_offset + 3 * sizeof(uint32_t), var->data_type->bytecode_offset - ctab_start);
 | |
| 
 | |
|             if (var->default_values)
 | |
|             {
 | |
|                 unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC];
 | |
|                 unsigned int comp_count = hlsl_type_component_count(var->data_type);
 | |
|                 unsigned int default_value_offset;
 | |
|                 unsigned int k;
 | |
| 
 | |
|                 default_value_offset = bytecode_reserve_bytes(buffer, reg_size * sizeof(uint32_t));
 | |
|                 set_u32(buffer, var_offset + 4 * sizeof(uint32_t), default_value_offset - ctab_start);
 | |
| 
 | |
|                 for (k = 0; k < comp_count; ++k)
 | |
|                 {
 | |
|                     struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k);
 | |
|                     unsigned int comp_offset;
 | |
|                     enum hlsl_regset regset;
 | |
| 
 | |
|                     comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set);
 | |
|                     if (regset == HLSL_REGSET_NUMERIC)
 | |
|                     {
 | |
|                         union
 | |
|                         {
 | |
|                             uint32_t u;
 | |
|                             float f;
 | |
|                         } uni = {0};
 | |
| 
 | |
|                         switch (comp_type->e.numeric.type)
 | |
|                         {
 | |
|                             case HLSL_TYPE_DOUBLE:
 | |
|                                 if (ctx->double_as_float_alias)
 | |
|                                     uni.u = var->default_values[k].number.u;
 | |
|                                 else
 | |
|                                     uni.u = 0;
 | |
|                                 break;
 | |
| 
 | |
|                             case HLSL_TYPE_INT:
 | |
|                                 uni.f = var->default_values[k].number.i;
 | |
|                                 break;
 | |
| 
 | |
|                             case HLSL_TYPE_MIN16UINT:
 | |
|                             case HLSL_TYPE_UINT:
 | |
|                             case HLSL_TYPE_BOOL:
 | |
|                                 uni.f = var->default_values[k].number.u;
 | |
|                                 break;
 | |
| 
 | |
|                             case HLSL_TYPE_HALF:
 | |
|                             case HLSL_TYPE_FLOAT:
 | |
|                                 uni.u = var->default_values[k].number.u;
 | |
|                                 break;
 | |
|                         }
 | |
| 
 | |
|                         set_u32(buffer, default_value_offset + comp_offset * sizeof(uint32_t), uni.u);
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             ++uniform_count;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     offset = put_string(buffer, vkd3d_shader_get_version(NULL, NULL));
 | |
|     set_u32(buffer, creator_offset, offset - ctab_start);
 | |
| }
 | |
| 
 | |
| static void sm1_generate_ctab(struct hlsl_ctx *ctx, struct vkd3d_shader_code *ctab)
 | |
| {
 | |
|     struct vkd3d_bytecode_buffer buffer = {0};
 | |
| 
 | |
|     write_sm1_uniforms(ctx, &buffer);
 | |
|     if (buffer.status)
 | |
|     {
 | |
|         vkd3d_free(buffer.data);
 | |
|         ctx->result = buffer.status;
 | |
|         return;
 | |
|     }
 | |
|     ctab->code = buffer.data;
 | |
|     ctab->size = buffer.size;
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_instr_dcl_semantic(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         const struct hlsl_ir_var *var, struct hlsl_block *block, const struct vkd3d_shader_location *loc)
 | |
| {
 | |
|     const struct vkd3d_shader_version *version = &program->shader_version;
 | |
|     const bool is_primitive = hlsl_type_is_primitive_array(var->data_type);
 | |
|     const bool output = var->is_output_semantic;
 | |
|     enum vkd3d_shader_sysval_semantic semantic;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     enum vkd3d_shader_register_type type;
 | |
|     enum vkd3d_shader_opcode opcode;
 | |
|     unsigned int idx = 0;
 | |
|     uint32_t write_mask;
 | |
|     bool has_idx;
 | |
| 
 | |
|     sm4_sysval_semantic_from_semantic_name(&semantic, version, ctx->semantic_compat_mapping, ctx->domain,
 | |
|             var->semantic.name, var->semantic.index, output, ctx->is_patch_constant_func, is_primitive);
 | |
|     if (semantic == ~0u)
 | |
|         semantic = VKD3D_SHADER_SV_NONE;
 | |
| 
 | |
|     if (var->is_input_semantic)
 | |
|     {
 | |
|         switch (semantic)
 | |
|         {
 | |
|             case VKD3D_SHADER_SV_NONE:
 | |
|                 opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
 | |
|                         ? VKD3DSIH_DCL_INPUT_PS : VKD3DSIH_DCL_INPUT;
 | |
|                 break;
 | |
| 
 | |
|             case VKD3D_SHADER_SV_PRIMITIVE_ID:
 | |
|                 if (version->type == VKD3D_SHADER_TYPE_PIXEL)
 | |
|                     opcode = VKD3DSIH_DCL_INPUT_PS_SGV;
 | |
|                 else if (version->type == VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|                     opcode = VKD3DSIH_DCL_INPUT;
 | |
|                 else
 | |
|                     opcode = VKD3DSIH_DCL_INPUT_SGV;
 | |
|                 break;
 | |
| 
 | |
|             case VKD3D_SHADER_SV_INSTANCE_ID:
 | |
|             case VKD3D_SHADER_SV_IS_FRONT_FACE:
 | |
|             case VKD3D_SHADER_SV_SAMPLE_INDEX:
 | |
|             case VKD3D_SHADER_SV_VERTEX_ID:
 | |
|                 opcode = (version->type == VKD3D_SHADER_TYPE_PIXEL)
 | |
|                         ? VKD3DSIH_DCL_INPUT_PS_SGV : VKD3DSIH_DCL_INPUT_SGV;
 | |
|                 break;
 | |
| 
 | |
|             default:
 | |
|                 if (version->type == VKD3D_SHADER_TYPE_PIXEL)
 | |
|                     opcode = VKD3DSIH_DCL_INPUT_PS_SIV;
 | |
|                 else if (is_primitive && version->type != VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|                     opcode = VKD3DSIH_DCL_INPUT;
 | |
|                 else
 | |
|                     opcode = VKD3DSIH_DCL_INPUT_SIV;
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         if (semantic == VKD3D_SHADER_SV_NONE || version->type == VKD3D_SHADER_TYPE_PIXEL
 | |
|                 || (version->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func))
 | |
|             opcode = VKD3DSIH_DCL_OUTPUT;
 | |
|         else if ((semantic == VKD3D_SHADER_SV_PRIMITIVE_ID || semantic == VKD3D_SHADER_SV_IS_FRONT_FACE)
 | |
|                 && version->type == VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|             opcode = VKD3DSIH_DCL_OUTPUT_SGV;
 | |
|         else
 | |
|             opcode = VKD3DSIH_DCL_OUTPUT_SIV;
 | |
|     }
 | |
| 
 | |
|     if (sm4_register_from_semantic_name(version, var->semantic.name, output, &type, &has_idx))
 | |
|     {
 | |
|         if (has_idx)
 | |
|             idx = var->semantic.index;
 | |
|         write_mask = (1u << var->data_type->e.numeric.dimx) - 1;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         type = sm4_get_semantic_register_type(version->type, ctx->is_patch_constant_func, var);
 | |
|         has_idx = true;
 | |
|         idx = var->regs[HLSL_REGSET_NUMERIC].id;
 | |
|         write_mask = var->regs[HLSL_REGSET_NUMERIC].writemask;
 | |
|     }
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, opcode, 0, 0)))
 | |
|         return;
 | |
| 
 | |
|     if (opcode == VKD3DSIH_DCL_OUTPUT)
 | |
|     {
 | |
|         VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || semantic == VKD3D_SHADER_SV_TARGET
 | |
|                 || version->type == VKD3D_SHADER_TYPE_HULL || type != VKD3DSPR_OUTPUT);
 | |
|         dst_param = &ins->declaration.dst;
 | |
|     }
 | |
|     else if (opcode == VKD3DSIH_DCL_INPUT || opcode == VKD3DSIH_DCL_INPUT_PS)
 | |
|     {
 | |
|         VKD3D_ASSERT(semantic == VKD3D_SHADER_SV_NONE || is_primitive || version->type == VKD3D_SHADER_TYPE_GEOMETRY);
 | |
|         dst_param = &ins->declaration.dst;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         VKD3D_ASSERT(semantic != VKD3D_SHADER_SV_NONE);
 | |
|         ins->declaration.register_semantic.sysval_semantic = vkd3d_siv_from_sysval_indexed(semantic,
 | |
|                 var->semantic.index);
 | |
|         dst_param = &ins->declaration.register_semantic.reg;
 | |
|     }
 | |
| 
 | |
|     if (is_primitive)
 | |
|     {
 | |
|         VKD3D_ASSERT(has_idx);
 | |
|         vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 2);
 | |
|         dst_param->reg.idx[0].offset = var->data_type->e.array.elements_count;
 | |
|         dst_param->reg.idx[1].offset = idx;
 | |
|     }
 | |
|     else if (has_idx)
 | |
|     {
 | |
|         vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 1);
 | |
|         dst_param->reg.idx[0].offset = idx;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         vsir_register_init(&dst_param->reg, type, VKD3D_DATA_FLOAT, 0);
 | |
|     }
 | |
| 
 | |
|     if (shader_sm4_is_scalar_register(&dst_param->reg))
 | |
|         dst_param->reg.dimension = VSIR_DIMENSION_SCALAR;
 | |
|     else
 | |
|         dst_param->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
| 
 | |
|     dst_param->write_mask = write_mask;
 | |
| 
 | |
|     if (var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_PIXEL)
 | |
|         ins->flags = sm4_get_interpolation_mode(var->data_type, var->storage_modifiers);
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_instr_dcl_temps(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         uint32_t temp_count, struct hlsl_block *block, const struct vkd3d_shader_location *loc)
 | |
| {
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_TEMPS, 0, 0)))
 | |
|         return;
 | |
| 
 | |
|     ins->declaration.count = temp_count;
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_instr_dcl_indexable_temp(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_block *block, uint32_t idx,
 | |
|         uint32_t size, uint32_t comp_count, const struct vkd3d_shader_location *loc)
 | |
| {
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, loc, VKD3DSIH_DCL_INDEXABLE_TEMP, 0, 0)))
 | |
|         return;
 | |
| 
 | |
|     ins->declaration.indexable_temp.register_idx = idx;
 | |
|     ins->declaration.indexable_temp.register_size = size;
 | |
|     ins->declaration.indexable_temp.alignment = 0;
 | |
|     ins->declaration.indexable_temp.data_type = VKD3D_DATA_FLOAT;
 | |
|     ins->declaration.indexable_temp.component_count = comp_count;
 | |
|     ins->declaration.indexable_temp.has_function_scope = false;
 | |
| }
 | |
| 
 | |
| static bool type_is_float(const struct hlsl_type *type)
 | |
| {
 | |
|     return type->e.numeric.type == HLSL_TYPE_FLOAT || type->e.numeric.type == HLSL_TYPE_HALF;
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_cast_from_bool(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         const struct hlsl_ir_expr *expr, uint32_t bits)
 | |
| {
 | |
|     struct hlsl_ir_node *operand = expr->operands[0].node;
 | |
|     const struct hlsl_ir_node *instr = &expr->node;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct hlsl_constant_value value = {0};
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     VKD3D_ASSERT(instr->reg.allocated);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_AND, 1, 2)))
 | |
|         return;
 | |
| 
 | |
|     dst_param = &ins->dst[0];
 | |
|     vsir_dst_from_hlsl_node(dst_param, ctx, instr);
 | |
| 
 | |
|     vsir_src_from_hlsl_node(&ins->src[0], ctx, operand, dst_param->write_mask);
 | |
| 
 | |
|     value.u[0].u = bits;
 | |
|     vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value, VKD3D_DATA_UINT, 1, 0);
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_expr_cast(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_expr *expr)
 | |
| {
 | |
|     const struct hlsl_ir_node *arg1 = expr->operands[0].node;
 | |
|     const struct hlsl_type *dst_type = expr->node.data_type;
 | |
|     const struct hlsl_type *src_type = arg1->data_type;
 | |
| 
 | |
|     static const union
 | |
|     {
 | |
|         uint32_t u;
 | |
|         float f;
 | |
|     } one = { .f = 1.0 };
 | |
| 
 | |
|     /* Narrowing casts were already lowered. */
 | |
|     VKD3D_ASSERT(src_type->e.numeric.dimx == dst_type->e.numeric.dimx);
 | |
| 
 | |
|     switch (dst_type->e.numeric.type)
 | |
|     {
 | |
|         case HLSL_TYPE_HALF:
 | |
|         case HLSL_TYPE_FLOAT:
 | |
|             switch (src_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_HALF:
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_INT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ITOF, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UTOF, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_BOOL:
 | |
|                     sm4_generate_vsir_cast_from_bool(ctx, program, expr, one.u);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_DOUBLE:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to float.");
 | |
|                     return false;
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         case HLSL_TYPE_INT:
 | |
|             switch (src_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_HALF:
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOI, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_INT:
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_BOOL:
 | |
|                     sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_DOUBLE:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to int.");
 | |
|                     return false;
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|         case HLSL_TYPE_UINT:
 | |
|             switch (src_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_HALF:
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FTOU, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_INT:
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_BOOL:
 | |
|                     sm4_generate_vsir_cast_from_bool(ctx, program, expr, 1u);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_DOUBLE:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 cast from double to uint.");
 | |
|                     return false;
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         case HLSL_TYPE_DOUBLE:
 | |
|             hlsl_fixme(ctx, &expr->node.loc, "SM4 cast to double.");
 | |
|             return false;
 | |
| 
 | |
|         case HLSL_TYPE_BOOL:
 | |
|             /* Casts to bool should have already been lowered. */
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_expr_with_two_destinations(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         enum vkd3d_shader_opcode opcode, const struct hlsl_ir_expr *expr, unsigned int dst_idx)
 | |
| {
 | |
|     const struct hlsl_ir_node *instr = &expr->node;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     unsigned int i, src_count;
 | |
| 
 | |
|     VKD3D_ASSERT(instr->reg.allocated);
 | |
| 
 | |
|     for (i = 0; i < HLSL_MAX_OPERANDS; ++i)
 | |
|     {
 | |
|         if (expr->operands[i].node)
 | |
|             src_count = i + 1;
 | |
|     }
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 2, src_count)))
 | |
|         return;
 | |
| 
 | |
|     dst_param = &ins->dst[dst_idx];
 | |
|     vsir_dst_from_hlsl_node(dst_param, ctx, instr);
 | |
| 
 | |
|     vsir_dst_param_init_null(&ins->dst[1 - dst_idx]);
 | |
| 
 | |
|     for (i = 0; i < src_count; ++i)
 | |
|         vsir_src_from_hlsl_node(&ins->src[i], ctx, expr->operands[i].node, dst_param->write_mask);
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_rcp_using_div(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct hlsl_ir_expr *expr)
 | |
| {
 | |
|     struct hlsl_ir_node *operand = expr->operands[0].node;
 | |
|     const struct hlsl_ir_node *instr = &expr->node;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct hlsl_constant_value value = {0};
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     VKD3D_ASSERT(type_is_float(expr->node.data_type));
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DIV, 1, 2)))
 | |
|         return;
 | |
| 
 | |
|     dst_param = &ins->dst[0];
 | |
|     vsir_dst_from_hlsl_node(dst_param, ctx, instr);
 | |
| 
 | |
|     value.u[0].f = 1.0f;
 | |
|     value.u[1].f = 1.0f;
 | |
|     value.u[2].f = 1.0f;
 | |
|     value.u[3].f = 1.0f;
 | |
|     vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value,
 | |
|             VKD3D_DATA_FLOAT, instr->data_type->e.numeric.dimx, dst_param->write_mask);
 | |
| 
 | |
|     vsir_src_from_hlsl_node(&ins->src[1], ctx, operand, dst_param->write_mask);
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_expr(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_expr *expr, const char *dst_type_name)
 | |
| {
 | |
|     const struct hlsl_type *dst_type = expr->node.data_type;
 | |
|     const struct hlsl_type *src_type = NULL;
 | |
| 
 | |
|     VKD3D_ASSERT(expr->node.reg.allocated);
 | |
|     if (expr->operands[0].node)
 | |
|         src_type = expr->operands[0].node->data_type;
 | |
| 
 | |
|     switch (expr->op)
 | |
|     {
 | |
|         case HLSL_OP0_RASTERIZER_SAMPLE_COUNT:
 | |
|             sm4_generate_vsir_rasterizer_sample_count(ctx, program, expr);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_ABS:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_ABS, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_BIT_NOT:
 | |
|             VKD3D_ASSERT(hlsl_type_is_integer(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_CAST:
 | |
|             return sm4_generate_vsir_instr_expr_cast(ctx, program, expr);
 | |
| 
 | |
|         case HLSL_OP1_CEIL:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_PI, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_COS:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 1);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_DSX:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_DSX_COARSE:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_COARSE, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_DSX_FINE:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSX_FINE, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_DSY:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_DSY_COARSE:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_COARSE, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_DSY_FINE:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DSY_FINE, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_EXP2:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EXP, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_F16TOF32:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F16TOF32, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_F32TOF16:
 | |
|             VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_UINT);
 | |
|             VKD3D_ASSERT(hlsl_version_ge(ctx, 5, 0));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_F32TOF16, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_FLOOR:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NI, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_FRACT:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_FRC, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_LOG2:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LOG, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_LOGIC_NOT:
 | |
|             VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NOT, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_NEG:
 | |
|             switch (dst_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, VKD3DSPSM_NEG, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_INT:
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INEG, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 %s negation expression.", dst_type_name);
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP1_RCP:
 | |
|             switch (dst_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     /* SM5 comes with a RCP opcode */
 | |
|                     if (hlsl_version_ge(ctx, 5, 0))
 | |
|                         generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RCP, 0, 0, true);
 | |
|                     else
 | |
|                         sm4_generate_vsir_rcp_using_div(ctx, program, expr);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 %s rcp expression.", dst_type_name);
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP1_REINTERPRET:
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_ROUND:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_NE, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_RSQ:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_RSQ, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_SAT:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOV, 0, VKD3DSPDM_SATURATE, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_SIN:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_SINCOS, expr, 0);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_SQRT:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_SQRT, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP1_TRUNC:
 | |
|             VKD3D_ASSERT(type_is_float(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ROUND_Z, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP2_ADD:
 | |
|             switch (dst_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ADD, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_INT:
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IADD, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 %s addition expression.", dst_type_name);
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP2_BIT_AND:
 | |
|             VKD3D_ASSERT(hlsl_type_is_integer(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP2_BIT_OR:
 | |
|             VKD3D_ASSERT(hlsl_type_is_integer(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP2_BIT_XOR:
 | |
|             VKD3D_ASSERT(hlsl_type_is_integer(dst_type));
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_XOR, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP2_DIV:
 | |
|             switch (dst_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DIV, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 0);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 %s division expression.", dst_type_name);
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP2_DOT:
 | |
|             switch (dst_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     switch (expr->operands[0].node->data_type->e.numeric.dimx)
 | |
|                     {
 | |
|                         case 4:
 | |
|                             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP4, 0, 0, false);
 | |
|                             return true;
 | |
| 
 | |
|                         case 3:
 | |
|                             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP3, 0, 0, false);
 | |
|                             return true;
 | |
| 
 | |
|                         case 2:
 | |
|                             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_DP2, 0, 0, false);
 | |
|                             return true;
 | |
| 
 | |
|                         case 1:
 | |
|                         default:
 | |
|                             vkd3d_unreachable();
 | |
|                     }
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 %s dot expression.", dst_type_name);
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP2_EQUAL:
 | |
|             VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
 | |
| 
 | |
|             switch (src_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_EQO, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_BOOL:
 | |
|                 case HLSL_TYPE_INT:
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IEQ, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 equality between \"%s\" operands.",
 | |
|                             debug_hlsl_type(ctx, src_type));
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP2_GEQUAL:
 | |
|             VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
 | |
| 
 | |
|             switch (src_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_GEO, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_INT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IGE, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_BOOL:
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UGE, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 greater-than-or-equal between \"%s\" operands.",
 | |
|                             debug_hlsl_type(ctx, src_type));
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP2_LESS:
 | |
|             VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
 | |
| 
 | |
|             switch (src_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_LTO, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_INT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ILT, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_BOOL:
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ULT, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 less-than between \"%s\" operands.",
 | |
|                             debug_hlsl_type(ctx, src_type));
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP2_LOGIC_AND:
 | |
|             VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_AND, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP2_LOGIC_OR:
 | |
|             VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_OR, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP2_LSHIFT:
 | |
|             VKD3D_ASSERT(hlsl_type_is_integer(dst_type));
 | |
|             VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL);
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_ISHL, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP3_MAD:
 | |
|             switch (dst_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAD, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_INT:
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAD, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 %s MAD expression.", dst_type_name);
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP2_MAX:
 | |
|             switch (dst_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MAX, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_INT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMAX, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMAX, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 %s maximum expression.", dst_type_name);
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP2_MIN:
 | |
|             switch (dst_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MIN, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_INT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_IMIN, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_UMIN, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 %s minimum expression.", dst_type_name);
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP2_MOD:
 | |
|             switch (dst_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_UDIV, expr, 1);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 %s modulus expression.", dst_type_name);
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP2_MUL:
 | |
|             switch (dst_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MUL, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_INT:
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     /* Using IMUL instead of UMUL because we're taking the low
 | |
|                      * bits, and the native compiler generates IMUL. */
 | |
|                     sm4_generate_vsir_expr_with_two_destinations(ctx, program, VKD3DSIH_IMUL, expr, 1);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 %s multiplication expression.", dst_type_name);
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP2_NEQUAL:
 | |
|             VKD3D_ASSERT(dst_type->e.numeric.type == HLSL_TYPE_BOOL);
 | |
| 
 | |
|             switch (src_type->e.numeric.type)
 | |
|             {
 | |
|                 case HLSL_TYPE_FLOAT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_NEU, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 case HLSL_TYPE_BOOL:
 | |
|                 case HLSL_TYPE_INT:
 | |
|                 case HLSL_TYPE_MIN16UINT: /* FIXME: Needs minimum-precision annotations. */
 | |
|                 case HLSL_TYPE_UINT:
 | |
|                     generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_INE, 0, 0, true);
 | |
|                     return true;
 | |
| 
 | |
|                 default:
 | |
|                     hlsl_fixme(ctx, &expr->node.loc, "SM4 inequality between \"%s\" operands.",
 | |
|                             debug_hlsl_type(ctx, src_type));
 | |
|                     return false;
 | |
|             }
 | |
| 
 | |
|         case HLSL_OP2_RSHIFT:
 | |
|             VKD3D_ASSERT(hlsl_type_is_integer(dst_type));
 | |
|             VKD3D_ASSERT(dst_type->e.numeric.type != HLSL_TYPE_BOOL);
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr,
 | |
|                     dst_type->e.numeric.type == HLSL_TYPE_INT ? VKD3DSIH_ISHR : VKD3DSIH_USHR, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_OP3_TERNARY:
 | |
|             generate_vsir_instr_expr_single_instr_op(ctx, program, expr, VKD3DSIH_MOVC, 0, 0, true);
 | |
|             return true;
 | |
| 
 | |
|         default:
 | |
|             hlsl_fixme(ctx, &expr->node.loc, "SM4 %s expression.", debug_hlsl_expr_op(expr->op));
 | |
|             return false;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_store(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_store *store)
 | |
| {
 | |
|     struct hlsl_ir_node *instr = &store->node;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
 | |
|         return false;
 | |
| 
 | |
|     dst_param = &ins->dst[0];
 | |
|     if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program,
 | |
|             dst_param, &store->lhs, &instr->loc, store->writemask))
 | |
|         return false;
 | |
| 
 | |
|     src_param = &ins->src[0];
 | |
|     vsir_src_from_hlsl_node(src_param, ctx, store->rhs.node, dst_param->write_mask);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* Does this variable's data come directly from the API user, rather than
 | |
|  * being temporary or from a previous shader stage? I.e. is it a uniform or
 | |
|  * VS input? */
 | |
| static bool var_is_user_input(const struct vkd3d_shader_version *version, const struct hlsl_ir_var *var)
 | |
| {
 | |
|     if (var->is_uniform)
 | |
|         return true;
 | |
| 
 | |
|     return var->is_input_semantic && version->type == VKD3D_SHADER_TYPE_VERTEX;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_load(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_load *load)
 | |
| {
 | |
|     const struct vkd3d_shader_version *version = &program->shader_version;
 | |
|     const struct hlsl_type *type = load->node.data_type;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct hlsl_ir_node *instr = &load->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     struct hlsl_constant_value value;
 | |
| 
 | |
|     VKD3D_ASSERT(hlsl_is_numeric_type(type));
 | |
|     if (type->e.numeric.type == HLSL_TYPE_BOOL && var_is_user_input(version, load->src.var))
 | |
|     {
 | |
|         /* Uniform bools can be specified as anything, but internal bools
 | |
|          * always have 0 for false and ~0 for true. Normalise that here. */
 | |
| 
 | |
|         if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOVC, 1, 3)))
 | |
|             return false;
 | |
| 
 | |
|         dst_param = &ins->dst[0];
 | |
|         vsir_dst_from_hlsl_node(dst_param, ctx, instr);
 | |
| 
 | |
|         if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
 | |
|                 &ins->src[0], &load->src, dst_param->write_mask, &instr->loc))
 | |
|             return false;
 | |
| 
 | |
|         memset(&value, 0xff, sizeof(value));
 | |
|         vsir_src_from_hlsl_constant_value(&ins->src[1], ctx, &value,
 | |
|                 VKD3D_DATA_UINT, type->e.numeric.dimx, dst_param->write_mask);
 | |
|         memset(&value, 0x00, sizeof(value));
 | |
|         vsir_src_from_hlsl_constant_value(&ins->src[2], ctx, &value,
 | |
|                 VKD3D_DATA_UINT, type->e.numeric.dimx, dst_param->write_mask);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_MOV, 1, 1)))
 | |
|             return false;
 | |
| 
 | |
|         dst_param = &ins->dst[0];
 | |
|         vsir_dst_from_hlsl_node(dst_param, ctx, instr);
 | |
| 
 | |
|         if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
 | |
|                 &ins->src[0], &load->src, dst_param->write_mask, &instr->loc))
 | |
|             return false;
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_resource_store(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_resource_store *store)
 | |
| {
 | |
|     struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &store->resource);
 | |
|     struct hlsl_ir_node *coords = store->coords.node, *value = store->value.node;
 | |
|     struct hlsl_ir_node *instr = &store->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     unsigned int writemask;
 | |
| 
 | |
|     if (store->store_type != HLSL_RESOURCE_STORE)
 | |
|     {
 | |
|         enum vkd3d_shader_opcode opcode;
 | |
| 
 | |
|         VKD3D_ASSERT(!store->value.node && !store->coords.node);
 | |
|         VKD3D_ASSERT(store->resource.var->regs[HLSL_REGSET_STREAM_OUTPUTS].allocated);
 | |
| 
 | |
|         if (hlsl_version_lt(ctx, 5, 0))
 | |
|         {
 | |
|             opcode = store->store_type == HLSL_RESOURCE_STREAM_APPEND ? VKD3DSIH_EMIT : VKD3DSIH_CUT;
 | |
|             ins = generate_vsir_add_program_instruction(ctx, program, &store->node.loc, opcode, 0, 0);
 | |
|             return !!ins;
 | |
|         }
 | |
| 
 | |
|         opcode = store->store_type == HLSL_RESOURCE_STREAM_APPEND ? VKD3DSIH_EMIT_STREAM : VKD3DSIH_CUT_STREAM;
 | |
|         if (!(ins = generate_vsir_add_program_instruction(ctx, program, &store->node.loc, opcode, 0, 1)))
 | |
|             return false;
 | |
| 
 | |
|         if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[0],
 | |
|                 &store->resource, VKD3DSP_WRITEMASK_ALL, &instr->loc))
 | |
|             return false;
 | |
| 
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     if (!store->resource.var->is_uniform)
 | |
|     {
 | |
|         hlsl_fixme(ctx, &store->node.loc, "Store to non-uniform resource variable.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
 | |
|     {
 | |
|         hlsl_fixme(ctx, &store->node.loc, "Structured buffers store is not implemented.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     if (resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
 | |
|     {
 | |
|         if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_RAW, 1, 2)))
 | |
|             return false;
 | |
| 
 | |
|         writemask = vkd3d_write_mask_from_component_count(value->data_type->e.numeric.dimx);
 | |
|         if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program,
 | |
|                 &ins->dst[0], &store->resource, &instr->loc, writemask))
 | |
|             return false;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_STORE_UAV_TYPED, 1, 2)))
 | |
|             return false;
 | |
| 
 | |
|         if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program,
 | |
|                 &ins->dst[0], &store->resource, &instr->loc, VKD3DSP_WRITEMASK_ALL))
 | |
|             return false;
 | |
|     }
 | |
| 
 | |
|     vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL);
 | |
|     vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_validate_texel_offset_aoffimmi(const struct hlsl_ir_node *texel_offset)
 | |
| {
 | |
|     struct hlsl_ir_constant *offset;
 | |
| 
 | |
|     VKD3D_ASSERT(texel_offset);
 | |
|     if (texel_offset->type != HLSL_IR_CONSTANT)
 | |
|         return false;
 | |
|     offset = hlsl_ir_constant(texel_offset);
 | |
| 
 | |
|     if (offset->value.u[0].i < -8 || offset->value.u[0].i > 7)
 | |
|         return false;
 | |
|     if (offset->node.data_type->e.numeric.dimx > 1 && (offset->value.u[1].i < -8 || offset->value.u[1].i > 7))
 | |
|         return false;
 | |
|     if (offset->node.data_type->e.numeric.dimx > 2 && (offset->value.u[2].i < -8 || offset->value.u[2].i > 7))
 | |
|         return false;
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_encode_texel_offset_as_aoffimmi(
 | |
|         struct vkd3d_shader_instruction *ins, const struct hlsl_ir_node *texel_offset)
 | |
| {
 | |
|     struct hlsl_ir_constant *offset;
 | |
| 
 | |
|     if (!texel_offset)
 | |
|         return;
 | |
|     offset = hlsl_ir_constant(texel_offset);
 | |
| 
 | |
|     ins->texel_offset.u = offset->value.u[0].i;
 | |
|     ins->texel_offset.v = 0;
 | |
|     ins->texel_offset.w = 0;
 | |
|     if (offset->node.data_type->e.numeric.dimx > 1)
 | |
|         ins->texel_offset.v = offset->value.u[1].i;
 | |
|     if (offset->node.data_type->e.numeric.dimx > 2)
 | |
|         ins->texel_offset.w = offset->value.u[2].i;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_ld(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct hlsl_ir_resource_load *load)
 | |
| {
 | |
|     const struct hlsl_type *resource_type = hlsl_deref_get_type(ctx, &load->resource);
 | |
|     bool uav = (hlsl_deref_get_regset(ctx, &load->resource) == HLSL_REGSET_UAVS);
 | |
|     const struct vkd3d_shader_version *version = &program->shader_version;
 | |
|     bool raw = resource_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER;
 | |
|     const struct hlsl_ir_node *sample_index = load->sample_index.node;
 | |
|     const struct hlsl_ir_node *texel_offset = load->texel_offset.node;
 | |
|     const struct hlsl_ir_node *coords = load->coords.node;
 | |
|     unsigned int coords_writemask = VKD3DSP_WRITEMASK_ALL;
 | |
|     const struct hlsl_deref *resource = &load->resource;
 | |
|     const struct hlsl_ir_node *instr = &load->node;
 | |
|     enum hlsl_sampler_dim dim = load->sampling_dim;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     enum vkd3d_shader_opcode opcode;
 | |
|     bool multisampled;
 | |
| 
 | |
|     VKD3D_ASSERT(load->load_type == HLSL_RESOURCE_LOAD);
 | |
| 
 | |
|     multisampled = resource_type->class == HLSL_CLASS_TEXTURE
 | |
|             && (resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS
 | |
|             || resource_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY);
 | |
| 
 | |
|     if (uav)
 | |
|         opcode = VKD3DSIH_LD_UAV_TYPED;
 | |
|     else if (raw)
 | |
|         opcode = VKD3DSIH_LD_RAW;
 | |
|     else
 | |
|         opcode = multisampled ? VKD3DSIH_LD2DMS : VKD3DSIH_LD;
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, 2 + multisampled)))
 | |
|         return false;
 | |
| 
 | |
|     if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset))
 | |
|     {
 | |
|         hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
 | |
|                 "Offset must resolve to integer literal in the range -8 to 7.");
 | |
|         return false;
 | |
|     }
 | |
|     sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset);
 | |
| 
 | |
|     vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
 | |
| 
 | |
|     if (!uav)
 | |
|     {
 | |
|         /* Mipmap level is in the last component in the IR, but needs to be in
 | |
|          * the W component in the instruction. */
 | |
|         unsigned int dim_count = hlsl_sampler_dim_count(dim);
 | |
| 
 | |
|         if (dim_count == 1)
 | |
|             coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_3;
 | |
|         if (dim_count == 2)
 | |
|             coords_writemask = VKD3DSP_WRITEMASK_0 | VKD3DSP_WRITEMASK_1 | VKD3DSP_WRITEMASK_3;
 | |
|     }
 | |
| 
 | |
|     vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, coords_writemask);
 | |
| 
 | |
|     if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
 | |
|             &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc))
 | |
|         return false;
 | |
| 
 | |
|     if (multisampled)
 | |
|     {
 | |
|         if (sample_index->type == HLSL_IR_CONSTANT)
 | |
|             vsir_src_from_hlsl_constant_value(&ins->src[2], ctx,
 | |
|                     &hlsl_ir_constant(sample_index)->value, VKD3D_DATA_INT, 1, 0);
 | |
|         else if (version->major == 4 && version->minor == 0)
 | |
|             hlsl_error(ctx, &sample_index->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE, "Expected literal sample index.");
 | |
|         else
 | |
|             vsir_src_from_hlsl_node(&ins->src[2], ctx, sample_index, VKD3DSP_WRITEMASK_ALL);
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_sample(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct hlsl_ir_resource_load *load)
 | |
| {
 | |
|     const struct hlsl_ir_node *texel_offset = load->texel_offset.node;
 | |
|     const struct hlsl_ir_node *coords = load->coords.node;
 | |
|     const struct hlsl_deref *resource = &load->resource;
 | |
|     const struct hlsl_deref *sampler = &load->sampler;
 | |
|     const struct hlsl_ir_node *instr = &load->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     enum vkd3d_shader_opcode opcode;
 | |
|     unsigned int src_count;
 | |
| 
 | |
|     switch (load->load_type)
 | |
|     {
 | |
|         case HLSL_RESOURCE_SAMPLE:
 | |
|             opcode = VKD3DSIH_SAMPLE;
 | |
|             src_count = 3;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_RESOURCE_SAMPLE_CMP:
 | |
|             opcode = VKD3DSIH_SAMPLE_C;
 | |
|             src_count = 4;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_RESOURCE_SAMPLE_CMP_LZ:
 | |
|             opcode = VKD3DSIH_SAMPLE_C_LZ;
 | |
|             src_count = 4;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_RESOURCE_SAMPLE_LOD:
 | |
|             opcode = VKD3DSIH_SAMPLE_LOD;
 | |
|             src_count = 4;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_RESOURCE_SAMPLE_LOD_BIAS:
 | |
|             opcode = VKD3DSIH_SAMPLE_B;
 | |
|             src_count = 4;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_RESOURCE_SAMPLE_GRAD:
 | |
|             opcode = VKD3DSIH_SAMPLE_GRAD;
 | |
|             src_count = 5;
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             vkd3d_unreachable();
 | |
|     }
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count)))
 | |
|         return false;
 | |
| 
 | |
|     if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset))
 | |
|     {
 | |
|         hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
 | |
|                 "Offset must resolve to integer literal in the range -8 to 7.");
 | |
|         return false;
 | |
|     }
 | |
|     sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset);
 | |
| 
 | |
|     vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
 | |
| 
 | |
|     vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL);
 | |
| 
 | |
|     if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[1],
 | |
|             resource, ins->dst[0].write_mask, &instr->loc))
 | |
|         return false;
 | |
| 
 | |
|     if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program, &ins->src[2],
 | |
|             sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc))
 | |
|         return false;
 | |
| 
 | |
|     if (opcode == VKD3DSIH_SAMPLE_LOD || opcode == VKD3DSIH_SAMPLE_B)
 | |
|     {
 | |
|         vsir_src_from_hlsl_node(&ins->src[3], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL);
 | |
|     }
 | |
|     else if (opcode == VKD3DSIH_SAMPLE_C || opcode == VKD3DSIH_SAMPLE_C_LZ)
 | |
|     {
 | |
|         vsir_src_from_hlsl_node(&ins->src[3], ctx, load->cmp.node, VKD3DSP_WRITEMASK_ALL);
 | |
|     }
 | |
|     else if (opcode == VKD3DSIH_SAMPLE_GRAD)
 | |
|     {
 | |
|         vsir_src_from_hlsl_node(&ins->src[3], ctx, load->ddx.node, VKD3DSP_WRITEMASK_ALL);
 | |
|         vsir_src_from_hlsl_node(&ins->src[4], ctx, load->ddy.node, VKD3DSP_WRITEMASK_ALL);
 | |
|     }
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_gather(struct hlsl_ctx *ctx, struct vsir_program *program,
 | |
|         const struct hlsl_ir_resource_load *load, uint32_t swizzle, bool compare)
 | |
| {
 | |
|     const struct vkd3d_shader_version *version = &program->shader_version;
 | |
|     const struct hlsl_ir_node *texel_offset = load->texel_offset.node;
 | |
|     const struct hlsl_ir_node *coords = load->coords.node;
 | |
|     const struct hlsl_deref *resource = &load->resource;
 | |
|     enum vkd3d_shader_opcode opcode = VKD3DSIH_GATHER4;
 | |
|     const struct hlsl_deref *sampler = &load->sampler;
 | |
|     const struct hlsl_ir_node *instr = &load->node;
 | |
|     unsigned int src_count = 3, current_arg = 0;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (texel_offset && !sm4_generate_vsir_validate_texel_offset_aoffimmi(texel_offset))
 | |
|     {
 | |
|         if (!vkd3d_shader_ver_ge(version, 5, 0))
 | |
|         {
 | |
|             hlsl_error(ctx, &texel_offset->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TEXEL_OFFSET,
 | |
|                 "Offset must resolve to integer literal in the range -8 to 7 for profiles < 5.");
 | |
|             return false;
 | |
|         }
 | |
|         opcode = VKD3DSIH_GATHER4_PO;
 | |
|         ++src_count;
 | |
|     }
 | |
| 
 | |
|     if (compare)
 | |
|     {
 | |
|         opcode = opcode == VKD3DSIH_GATHER4 ? VKD3DSIH_GATHER4_C : VKD3DSIH_GATHER4_PO_C;
 | |
|         ++src_count;
 | |
|     }
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode, 1, src_count)))
 | |
|         return false;
 | |
| 
 | |
|     vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
 | |
|     vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, coords, VKD3DSP_WRITEMASK_ALL);
 | |
| 
 | |
|     if (opcode == VKD3DSIH_GATHER4_PO || opcode == VKD3DSIH_GATHER4_PO_C)
 | |
|         vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, texel_offset, VKD3DSP_WRITEMASK_ALL);
 | |
|     else
 | |
|         sm4_generate_vsir_encode_texel_offset_as_aoffimmi(ins, texel_offset);
 | |
| 
 | |
|     if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
 | |
|             &ins->src[current_arg++], resource, ins->dst[0].write_mask, &instr->loc))
 | |
|         return false;
 | |
| 
 | |
|     if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
 | |
|             &ins->src[current_arg], sampler, VKD3DSP_WRITEMASK_ALL, &instr->loc))
 | |
|         return false;
 | |
|     ins->src[current_arg].reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|     ins->src[current_arg].swizzle = swizzle;
 | |
|     current_arg++;
 | |
| 
 | |
|     if (compare)
 | |
|         vsir_src_from_hlsl_node(&ins->src[current_arg++], ctx, load->cmp.node, VKD3DSP_WRITEMASK_0);
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_sample_info(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct hlsl_ir_resource_load *load)
 | |
| {
 | |
|     const struct hlsl_deref *resource = &load->resource;
 | |
|     const struct hlsl_ir_node *instr = &load->node;
 | |
|     struct hlsl_type *type = instr->data_type;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SAMPLE_INFO, 1, 1)))
 | |
|         return false;
 | |
| 
 | |
|     if (type->e.numeric.type == HLSL_TYPE_UINT)
 | |
|         ins->flags = VKD3DSI_SAMPLE_INFO_UINT;
 | |
| 
 | |
|     vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
 | |
| 
 | |
|     if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
 | |
|             &ins->src[0], resource, ins->dst[0].write_mask, &instr->loc))
 | |
|         return false;
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_resinfo(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct hlsl_ir_resource_load *load)
 | |
| {
 | |
|     const struct hlsl_deref *resource = &load->resource;
 | |
|     const struct hlsl_ir_node *instr = &load->node;
 | |
|     struct hlsl_type *type = instr->data_type;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_BUFFER
 | |
|             || resource->data_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
 | |
|     {
 | |
|         hlsl_fixme(ctx, &load->node.loc, "resinfo for buffers.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     VKD3D_ASSERT(type->e.numeric.type == HLSL_TYPE_UINT || type->e.numeric.type == HLSL_TYPE_FLOAT);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_RESINFO, 1, 2)))
 | |
|         return false;
 | |
| 
 | |
|     if (type->e.numeric.type == HLSL_TYPE_UINT)
 | |
|         ins->flags = VKD3DSI_RESINFO_UINT;
 | |
| 
 | |
|     vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
 | |
| 
 | |
|     vsir_src_from_hlsl_node(&ins->src[0], ctx, load->lod.node, VKD3DSP_WRITEMASK_ALL);
 | |
| 
 | |
|     if (!sm4_generate_vsir_init_src_param_from_deref(ctx, program,
 | |
|             &ins->src[1], resource, ins->dst[0].write_mask, &instr->loc))
 | |
|         return false;
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static uint32_t get_gather_swizzle(enum hlsl_resource_load_type type)
 | |
| {
 | |
|     switch (type)
 | |
|     {
 | |
|         case HLSL_RESOURCE_GATHER_RED:
 | |
|         case HLSL_RESOURCE_GATHER_CMP_RED:
 | |
|             return VKD3D_SHADER_SWIZZLE(X, X, X, X);
 | |
| 
 | |
|         case HLSL_RESOURCE_GATHER_GREEN:
 | |
|         case HLSL_RESOURCE_GATHER_CMP_GREEN:
 | |
|             return VKD3D_SHADER_SWIZZLE(Y, Y, Y, Y);
 | |
| 
 | |
|         case HLSL_RESOURCE_GATHER_BLUE:
 | |
|         case HLSL_RESOURCE_GATHER_CMP_BLUE:
 | |
|             return VKD3D_SHADER_SWIZZLE(Z, Z, Z, Z);
 | |
| 
 | |
|         case HLSL_RESOURCE_GATHER_ALPHA:
 | |
|         case HLSL_RESOURCE_GATHER_CMP_ALPHA:
 | |
|             return VKD3D_SHADER_SWIZZLE(W, W, W, W);
 | |
|         default:
 | |
|             return 0;
 | |
|     }
 | |
| 
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_resource_load(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct hlsl_ir_resource_load *load)
 | |
| {
 | |
|     if (load->sampler.var && !load->sampler.var->is_uniform)
 | |
|     {
 | |
|         hlsl_fixme(ctx, &load->node.loc, "Sample using non-uniform sampler variable.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     if (!load->resource.var->is_uniform)
 | |
|     {
 | |
|         hlsl_fixme(ctx, &load->node.loc, "Load from non-uniform resource variable.");
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     switch (load->load_type)
 | |
|     {
 | |
|         case HLSL_RESOURCE_LOAD:
 | |
|             return sm4_generate_vsir_instr_ld(ctx, program, load);
 | |
| 
 | |
|         case HLSL_RESOURCE_SAMPLE:
 | |
|         case HLSL_RESOURCE_SAMPLE_CMP:
 | |
|         case HLSL_RESOURCE_SAMPLE_CMP_LZ:
 | |
|         case HLSL_RESOURCE_SAMPLE_LOD:
 | |
|         case HLSL_RESOURCE_SAMPLE_LOD_BIAS:
 | |
|         case HLSL_RESOURCE_SAMPLE_GRAD:
 | |
|             /* Combined sample expressions were lowered. */
 | |
|             VKD3D_ASSERT(load->sampler.var);
 | |
|             return sm4_generate_vsir_instr_sample(ctx, program, load);
 | |
| 
 | |
|         case HLSL_RESOURCE_GATHER_RED:
 | |
|         case HLSL_RESOURCE_GATHER_GREEN:
 | |
|         case HLSL_RESOURCE_GATHER_BLUE:
 | |
|         case HLSL_RESOURCE_GATHER_ALPHA:
 | |
|             return sm4_generate_vsir_instr_gather(ctx, program, load, get_gather_swizzle(load->load_type), false);
 | |
| 
 | |
|         case HLSL_RESOURCE_GATHER_CMP_RED:
 | |
|         case HLSL_RESOURCE_GATHER_CMP_GREEN:
 | |
|         case HLSL_RESOURCE_GATHER_CMP_BLUE:
 | |
|         case HLSL_RESOURCE_GATHER_CMP_ALPHA:
 | |
|             return sm4_generate_vsir_instr_gather(ctx, program, load, get_gather_swizzle(load->load_type), true);
 | |
| 
 | |
|         case HLSL_RESOURCE_SAMPLE_INFO:
 | |
|             return sm4_generate_vsir_instr_sample_info(ctx, program, load);
 | |
| 
 | |
|         case HLSL_RESOURCE_RESINFO:
 | |
|             return sm4_generate_vsir_instr_resinfo(ctx, program, load);
 | |
| 
 | |
|         case HLSL_RESOURCE_SAMPLE_PROJ:
 | |
|             vkd3d_unreachable();
 | |
| 
 | |
|         default:
 | |
|             return false;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_interlocked(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_interlocked *interlocked)
 | |
| {
 | |
| 
 | |
|     static const enum vkd3d_shader_opcode opcodes[] =
 | |
|     {
 | |
|         [HLSL_INTERLOCKED_ADD] = VKD3DSIH_ATOMIC_IADD,
 | |
|         [HLSL_INTERLOCKED_AND] = VKD3DSIH_ATOMIC_AND,
 | |
|         [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_ATOMIC_CMP_STORE,
 | |
|         [HLSL_INTERLOCKED_MAX] = VKD3DSIH_ATOMIC_UMAX,
 | |
|         [HLSL_INTERLOCKED_MIN] = VKD3DSIH_ATOMIC_UMIN,
 | |
|         [HLSL_INTERLOCKED_OR] = VKD3DSIH_ATOMIC_OR,
 | |
|         [HLSL_INTERLOCKED_XOR] = VKD3DSIH_ATOMIC_XOR,
 | |
|     };
 | |
| 
 | |
|     static const enum vkd3d_shader_opcode imm_opcodes[] =
 | |
|     {
 | |
|         [HLSL_INTERLOCKED_ADD] = VKD3DSIH_IMM_ATOMIC_IADD,
 | |
|         [HLSL_INTERLOCKED_AND] = VKD3DSIH_IMM_ATOMIC_AND,
 | |
|         [HLSL_INTERLOCKED_CMP_EXCH] = VKD3DSIH_IMM_ATOMIC_CMP_EXCH,
 | |
|         [HLSL_INTERLOCKED_EXCH] = VKD3DSIH_IMM_ATOMIC_EXCH,
 | |
|         [HLSL_INTERLOCKED_MAX] = VKD3DSIH_IMM_ATOMIC_UMAX,
 | |
|         [HLSL_INTERLOCKED_MIN] = VKD3DSIH_IMM_ATOMIC_UMIN,
 | |
|         [HLSL_INTERLOCKED_OR] = VKD3DSIH_IMM_ATOMIC_OR,
 | |
|         [HLSL_INTERLOCKED_XOR] = VKD3DSIH_IMM_ATOMIC_XOR,
 | |
|     };
 | |
| 
 | |
|     struct hlsl_ir_node *cmp_value = interlocked->cmp_value.node, *value = interlocked->value.node;
 | |
|     struct hlsl_ir_node *coords = interlocked->coords.node;
 | |
|     struct hlsl_ir_node *instr = &interlocked->node;
 | |
|     bool is_imm = interlocked->node.reg.allocated;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     enum vkd3d_shader_opcode opcode;
 | |
| 
 | |
|     opcode = is_imm ? imm_opcodes[interlocked->op] : opcodes[interlocked->op];
 | |
| 
 | |
|     if (value->data_type->e.numeric.type == HLSL_TYPE_INT)
 | |
|     {
 | |
|         if (opcode == VKD3DSIH_ATOMIC_UMAX)
 | |
|             opcode = VKD3DSIH_ATOMIC_IMAX;
 | |
|         else if (opcode == VKD3DSIH_ATOMIC_UMIN)
 | |
|             opcode = VKD3DSIH_ATOMIC_IMIN;
 | |
|         else if (opcode == VKD3DSIH_IMM_ATOMIC_UMAX)
 | |
|             opcode = VKD3DSIH_IMM_ATOMIC_IMAX;
 | |
|         else if (opcode == VKD3DSIH_IMM_ATOMIC_UMIN)
 | |
|             opcode = VKD3DSIH_IMM_ATOMIC_IMIN;
 | |
|     }
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, opcode,
 | |
|             is_imm ? 2 : 1, cmp_value ? 3 : 2)))
 | |
|         return false;
 | |
| 
 | |
|     if (is_imm)
 | |
|         vsir_dst_from_hlsl_node(&ins->dst[0], ctx, instr);
 | |
| 
 | |
|     dst_param = is_imm ? &ins->dst[1] : &ins->dst[0];
 | |
|     if (!sm4_generate_vsir_init_dst_param_from_deref(ctx, program, dst_param, &interlocked->dst, &instr->loc, 0))
 | |
|         return false;
 | |
|     dst_param->reg.dimension = VSIR_DIMENSION_NONE;
 | |
| 
 | |
|     vsir_src_from_hlsl_node(&ins->src[0], ctx, coords, VKD3DSP_WRITEMASK_ALL);
 | |
|     if (cmp_value)
 | |
|     {
 | |
|         vsir_src_from_hlsl_node(&ins->src[1], ctx, cmp_value, VKD3DSP_WRITEMASK_ALL);
 | |
|         vsir_src_from_hlsl_node(&ins->src[2], ctx, value, VKD3DSP_WRITEMASK_ALL);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         vsir_src_from_hlsl_node(&ins->src[1], ctx, value, VKD3DSP_WRITEMASK_ALL);
 | |
|     }
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_jump(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct hlsl_ir_jump *jump)
 | |
| {
 | |
|     const struct hlsl_ir_node *instr = &jump->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     switch (jump->type)
 | |
|     {
 | |
|         case HLSL_IR_JUMP_BREAK:
 | |
|             return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_BREAK, 0, 0);
 | |
| 
 | |
|         case HLSL_IR_JUMP_CONTINUE:
 | |
|             return generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CONTINUE, 0, 0);
 | |
| 
 | |
|         case HLSL_IR_JUMP_DISCARD_NZ:
 | |
|             if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DISCARD, 0, 1)))
 | |
|                 return false;
 | |
|             ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ;
 | |
| 
 | |
|             vsir_src_from_hlsl_node(&ins->src[0], ctx, jump->condition.node, VKD3DSP_WRITEMASK_ALL);
 | |
|             return true;
 | |
| 
 | |
|         case HLSL_IR_JUMP_RETURN:
 | |
|             vkd3d_unreachable();
 | |
| 
 | |
|         default:
 | |
|             hlsl_fixme(ctx, &jump->node.loc, "Jump type %s.", hlsl_jump_type_to_string(jump->type));
 | |
|             return false;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static bool sm4_generate_vsir_instr_sync(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct hlsl_ir_sync *sync)
 | |
| {
 | |
|     const struct hlsl_ir_node *instr = &sync->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SYNC, 0, 0)))
 | |
|         return false;
 | |
|     ins->flags = sync->sync_flags;
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program);
 | |
| 
 | |
| static void sm4_generate_vsir_instr_if(struct hlsl_ctx *ctx, struct vsir_program *program, struct hlsl_ir_if *iff)
 | |
| {
 | |
|     struct hlsl_ir_node *instr = &iff->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     VKD3D_ASSERT(iff->condition.node->data_type->e.numeric.dimx == 1);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_IF, 0, 1)))
 | |
|         return;
 | |
|     ins->flags = VKD3D_SHADER_CONDITIONAL_OP_NZ;
 | |
| 
 | |
|     vsir_src_from_hlsl_node(&ins->src[0], ctx, iff->condition.node, VKD3DSP_WRITEMASK_ALL);
 | |
| 
 | |
|     sm4_generate_vsir_block(ctx, &iff->then_block, program);
 | |
| 
 | |
|     if (!list_empty(&iff->else_block.instrs))
 | |
|     {
 | |
|         if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ELSE, 0, 0)))
 | |
|             return;
 | |
|         sm4_generate_vsir_block(ctx, &iff->else_block, program);
 | |
|     }
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDIF, 0, 0)))
 | |
|         return;
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_instr_loop(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_loop *loop)
 | |
| {
 | |
|     struct hlsl_ir_node *instr = &loop->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_LOOP, 0, 0)))
 | |
|         return;
 | |
| 
 | |
|     sm4_generate_vsir_block(ctx, &loop->body, program);
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDLOOP, 0, 0)))
 | |
|         return;
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_instr_switch(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, struct hlsl_ir_switch *swi)
 | |
| {
 | |
|     const struct hlsl_ir_node *selector = swi->selector.node;
 | |
|     struct hlsl_ir_node *instr = &swi->node;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     struct hlsl_ir_switch_case *cas;
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_SWITCH, 0, 1)))
 | |
|         return;
 | |
|     vsir_src_from_hlsl_node(&ins->src[0], ctx, selector, VKD3DSP_WRITEMASK_ALL);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(cas, &swi->cases, struct hlsl_ir_switch_case, entry)
 | |
|     {
 | |
|         if (cas->is_default)
 | |
|         {
 | |
|             if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_DEFAULT, 0, 0)))
 | |
|                 return;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             struct hlsl_constant_value value = {.u[0].u = cas->value};
 | |
| 
 | |
|             if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_CASE, 0, 1)))
 | |
|                 return;
 | |
|             vsir_src_from_hlsl_constant_value(&ins->src[0], ctx, &value, VKD3D_DATA_UINT, 1, VKD3DSP_WRITEMASK_ALL);
 | |
|         }
 | |
| 
 | |
|         sm4_generate_vsir_block(ctx, &cas->body, program);
 | |
|     }
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &instr->loc, VKD3DSIH_ENDSWITCH, 0, 0)))
 | |
|         return;
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_block(struct hlsl_ctx *ctx, struct hlsl_block *block, struct vsir_program *program)
 | |
| {
 | |
|     struct vkd3d_string_buffer *dst_type_string;
 | |
|     struct hlsl_ir_node *instr, *next;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY_SAFE(instr, next, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         if (instr->data_type)
 | |
|         {
 | |
|             if (instr->data_type->class != HLSL_CLASS_SCALAR && instr->data_type->class != HLSL_CLASS_VECTOR)
 | |
|             {
 | |
|                 hlsl_fixme(ctx, &instr->loc, "Class %#x should have been lowered or removed.", instr->data_type->class);
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         switch (instr->type)
 | |
|         {
 | |
|             case HLSL_IR_CALL:
 | |
|                 vkd3d_unreachable();
 | |
| 
 | |
|             case HLSL_IR_CONSTANT:
 | |
|                 /* In SM4 all constants are inlined. */
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_EXPR:
 | |
|                 if (!(dst_type_string = hlsl_type_to_string(ctx, instr->data_type)))
 | |
|                     break;
 | |
|                 sm4_generate_vsir_instr_expr(ctx, program, hlsl_ir_expr(instr), dst_type_string->buffer);
 | |
|                 hlsl_release_string_buffer(ctx, dst_type_string);
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_IF:
 | |
|                 sm4_generate_vsir_instr_if(ctx, program, hlsl_ir_if(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_LOAD:
 | |
|                 sm4_generate_vsir_instr_load(ctx, program, hlsl_ir_load(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_LOOP:
 | |
|                 sm4_generate_vsir_instr_loop(ctx, program, hlsl_ir_loop(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_RESOURCE_LOAD:
 | |
|                 sm4_generate_vsir_instr_resource_load(ctx, program, hlsl_ir_resource_load(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_RESOURCE_STORE:
 | |
|                 sm4_generate_vsir_instr_resource_store(ctx, program, hlsl_ir_resource_store(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_JUMP:
 | |
|                 sm4_generate_vsir_instr_jump(ctx, program, hlsl_ir_jump(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_STORE:
 | |
|                 sm4_generate_vsir_instr_store(ctx, program, hlsl_ir_store(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_SWITCH:
 | |
|                 sm4_generate_vsir_instr_switch(ctx, program, hlsl_ir_switch(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_SWIZZLE:
 | |
|                 generate_vsir_instr_swizzle(ctx, program, hlsl_ir_swizzle(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_INTERLOCKED:
 | |
|                 sm4_generate_vsir_instr_interlocked(ctx, program, hlsl_ir_interlocked(instr));
 | |
|                 break;
 | |
| 
 | |
|             case HLSL_IR_SYNC:
 | |
|                 sm4_generate_vsir_instr_sync(ctx, program, hlsl_ir_sync(instr));
 | |
|                 break;
 | |
| 
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_add_function(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_ir_function_decl *func, uint64_t config_flags, struct vsir_program *program)
 | |
| {
 | |
|     struct hlsl_block block = {0};
 | |
|     struct hlsl_scope *scope;
 | |
|     struct hlsl_ir_var *var;
 | |
|     uint32_t temp_count;
 | |
| 
 | |
|     ctx->is_patch_constant_func = func == ctx->patch_constant_func;
 | |
| 
 | |
|     compute_liveness(ctx, func);
 | |
|     mark_indexable_vars(ctx, func);
 | |
|     temp_count = allocate_temp_registers(ctx, func);
 | |
|     if (ctx->result)
 | |
|         return;
 | |
|     program->temp_count = max(program->temp_count, temp_count);
 | |
| 
 | |
|     hlsl_block_init(&block);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &func->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if ((var->is_input_semantic && var->last_read)
 | |
|                 || (var->is_output_semantic && var->first_write))
 | |
|             sm4_generate_vsir_instr_dcl_semantic(ctx, program, var, &block, &var->loc);
 | |
|     }
 | |
| 
 | |
|     if (temp_count)
 | |
|         sm4_generate_vsir_instr_dcl_temps(ctx, program, temp_count, &block, &func->loc);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(scope, &ctx->scopes, struct hlsl_scope, entry)
 | |
|     {
 | |
|         LIST_FOR_EACH_ENTRY(var, &scope->vars, struct hlsl_ir_var, scope_entry)
 | |
|         {
 | |
|             if (var->is_uniform || var->is_input_semantic || var->is_output_semantic)
 | |
|                 continue;
 | |
|             if (!var->regs[HLSL_REGSET_NUMERIC].allocated)
 | |
|                 continue;
 | |
| 
 | |
|             if (var->indexable)
 | |
|             {
 | |
|                 unsigned int id = var->regs[HLSL_REGSET_NUMERIC].id;
 | |
|                 unsigned int size = align(var->data_type->reg_size[HLSL_REGSET_NUMERIC], 4) / 4;
 | |
| 
 | |
|                 sm4_generate_vsir_instr_dcl_indexable_temp(ctx, program, &block, id, size, 4, &var->loc);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     list_move_head(&func->body.instrs, &block.instrs);
 | |
| 
 | |
|     hlsl_block_cleanup(&block);
 | |
| 
 | |
|     sm4_generate_vsir_block(ctx, &func->body, program);
 | |
| 
 | |
|     generate_vsir_add_program_instruction(ctx, program, &func->loc, VKD3DSIH_RET, 0, 0);
 | |
| }
 | |
| 
 | |
| static int sm4_compare_extern_resources(const void *a, const void *b)
 | |
| {
 | |
|     const struct extern_resource *aa = a;
 | |
|     const struct extern_resource *bb = b;
 | |
|     int r;
 | |
| 
 | |
|     if ((r = vkd3d_u32_compare(aa->regset, bb->regset)))
 | |
|         return r;
 | |
| 
 | |
|     if ((r = vkd3d_u32_compare(aa->space, bb->space)))
 | |
|         return r;
 | |
| 
 | |
|     return vkd3d_u32_compare(aa->index, bb->index);
 | |
| }
 | |
| 
 | |
| static const char *string_skip_tag(const char *string)
 | |
| {
 | |
|     if (!strncmp(string, "<resource>", strlen("<resource>")))
 | |
|         return string + strlen("<resource>");
 | |
|     return string;
 | |
| }
 | |
| 
 | |
| static void sm4_free_extern_resources(struct extern_resource *extern_resources, unsigned int count)
 | |
| {
 | |
|     unsigned int i;
 | |
| 
 | |
|     for (i = 0; i < count; ++i)
 | |
|     {
 | |
|         vkd3d_free(extern_resources[i].name);
 | |
|     }
 | |
|     vkd3d_free(extern_resources);
 | |
| }
 | |
| 
 | |
| static struct extern_resource *sm4_get_extern_resources(struct hlsl_ctx *ctx, unsigned int *count)
 | |
| {
 | |
|     bool separate_components = ctx->profile->major_version == 5 && ctx->profile->minor_version == 0;
 | |
|     struct extern_resource *extern_resources = NULL;
 | |
|     const struct hlsl_ir_var *var;
 | |
|     struct hlsl_buffer *buffer;
 | |
|     enum hlsl_regset regset;
 | |
|     size_t capacity = 0;
 | |
|     char *name;
 | |
| 
 | |
|     *count = 0;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         if (separate_components)
 | |
|         {
 | |
|             unsigned int component_count = hlsl_type_component_count(var->data_type);
 | |
|             unsigned int k, regset_offset;
 | |
| 
 | |
|             for (k = 0; k < component_count; ++k)
 | |
|             {
 | |
|                 struct hlsl_type *component_type = hlsl_type_get_component_type(ctx, var->data_type, k);
 | |
|                 struct vkd3d_string_buffer *name_buffer;
 | |
| 
 | |
|                 if (!hlsl_type_is_resource(component_type))
 | |
|                     continue;
 | |
| 
 | |
|                 regset_offset = hlsl_type_get_component_offset(ctx, var->data_type, k, ®set);
 | |
|                 if (regset_offset > var->regs[regset].allocation_size)
 | |
|                     continue;
 | |
| 
 | |
|                 if (!var->objects_usage[regset][regset_offset].used)
 | |
|                     continue;
 | |
| 
 | |
|                 if (!(hlsl_array_reserve(ctx, (void **)&extern_resources,
 | |
|                         &capacity, *count + 1, sizeof(*extern_resources))))
 | |
|                 {
 | |
|                     sm4_free_extern_resources(extern_resources, *count);
 | |
|                     *count = 0;
 | |
|                     return NULL;
 | |
|                 }
 | |
| 
 | |
|                 if (!(name_buffer = hlsl_component_to_string(ctx, var, k)))
 | |
|                 {
 | |
|                     sm4_free_extern_resources(extern_resources, *count);
 | |
|                     *count = 0;
 | |
|                     return NULL;
 | |
|                 }
 | |
|                 if (!(name = hlsl_strdup(ctx, string_skip_tag(name_buffer->buffer))))
 | |
|                 {
 | |
|                     sm4_free_extern_resources(extern_resources, *count);
 | |
|                     *count = 0;
 | |
|                     hlsl_release_string_buffer(ctx, name_buffer);
 | |
|                     return NULL;
 | |
|                 }
 | |
|                 hlsl_release_string_buffer(ctx, name_buffer);
 | |
| 
 | |
|                 extern_resources[*count].var = NULL;
 | |
|                 extern_resources[*count].buffer = NULL;
 | |
| 
 | |
|                 extern_resources[*count].name = name;
 | |
|                 extern_resources[*count].is_user_packed = !!var->reg_reservation.reg_type;
 | |
| 
 | |
|                 extern_resources[*count].component_type = component_type;
 | |
| 
 | |
|                 extern_resources[*count].regset = regset;
 | |
|                 extern_resources[*count].id = var->regs[regset].id;
 | |
|                 extern_resources[*count].space = var->regs[regset].space;
 | |
|                 extern_resources[*count].index = var->regs[regset].index + regset_offset;
 | |
|                 extern_resources[*count].bind_count = 1;
 | |
|                 extern_resources[*count].loc = var->loc;
 | |
| 
 | |
|                 ++*count;
 | |
|             }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             unsigned int r;
 | |
| 
 | |
|             if (!hlsl_type_is_resource(var->data_type))
 | |
|                 continue;
 | |
| 
 | |
|             for (r = 0; r <= HLSL_REGSET_LAST; ++r)
 | |
|             {
 | |
|                 if (!var->regs[r].allocated)
 | |
|                     continue;
 | |
| 
 | |
|                 if (!(hlsl_array_reserve(ctx, (void **)&extern_resources,
 | |
|                         &capacity, *count + 1, sizeof(*extern_resources))))
 | |
|                 {
 | |
|                     sm4_free_extern_resources(extern_resources, *count);
 | |
|                     *count = 0;
 | |
|                     return NULL;
 | |
|                 }
 | |
| 
 | |
|                 if (!(name = hlsl_strdup(ctx, string_skip_tag(var->name))))
 | |
|                 {
 | |
|                     sm4_free_extern_resources(extern_resources, *count);
 | |
|                     *count = 0;
 | |
|                     return NULL;
 | |
|                 }
 | |
| 
 | |
|                 extern_resources[*count].var = var;
 | |
|                 extern_resources[*count].buffer = NULL;
 | |
| 
 | |
|                 extern_resources[*count].name = name;
 | |
|                 /* For some reason 5.1 resources aren't marked as
 | |
|                  * user-packed, but cbuffers still are. */
 | |
|                 extern_resources[*count].is_user_packed = hlsl_version_lt(ctx, 5, 1)
 | |
|                         && !!var->reg_reservation.reg_type;
 | |
| 
 | |
|                 extern_resources[*count].component_type = hlsl_type_get_component_type(ctx, var->data_type, 0);
 | |
| 
 | |
|                 extern_resources[*count].regset = r;
 | |
|                 extern_resources[*count].id = var->regs[r].id;
 | |
|                 extern_resources[*count].space = var->regs[r].space;
 | |
|                 extern_resources[*count].index = var->regs[r].index;
 | |
|                 extern_resources[*count].bind_count = var->bind_count[r];
 | |
|                 extern_resources[*count].loc = var->loc;
 | |
| 
 | |
|                 ++*count;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(buffer, &ctx->buffers, struct hlsl_buffer, entry)
 | |
|     {
 | |
|         if (!buffer->reg.allocated)
 | |
|             continue;
 | |
| 
 | |
|         if (!(hlsl_array_reserve(ctx, (void **)&extern_resources,
 | |
|                 &capacity, *count + 1, sizeof(*extern_resources))))
 | |
|         {
 | |
|             sm4_free_extern_resources(extern_resources, *count);
 | |
|             *count = 0;
 | |
|             return NULL;
 | |
|         }
 | |
| 
 | |
|         if (!(name = hlsl_strdup(ctx, buffer->name)))
 | |
|         {
 | |
|             sm4_free_extern_resources(extern_resources, *count);
 | |
|             *count = 0;
 | |
|             return NULL;
 | |
|         }
 | |
| 
 | |
|         extern_resources[*count].var = NULL;
 | |
|         extern_resources[*count].buffer = buffer;
 | |
| 
 | |
|         extern_resources[*count].name = name;
 | |
|         extern_resources[*count].is_user_packed = !!buffer->reservation.reg_type;
 | |
| 
 | |
|         extern_resources[*count].component_type = NULL;
 | |
| 
 | |
|         extern_resources[*count].regset = HLSL_REGSET_NUMERIC;
 | |
|         extern_resources[*count].id = buffer->reg.id;
 | |
|         extern_resources[*count].space = buffer->reg.space;
 | |
|         extern_resources[*count].index = buffer->reg.index;
 | |
|         extern_resources[*count].bind_count = 1;
 | |
|         extern_resources[*count].loc = buffer->loc;
 | |
| 
 | |
|         ++*count;
 | |
|     }
 | |
| 
 | |
|     if (extern_resources)
 | |
|         qsort(extern_resources, *count, sizeof(*extern_resources), sm4_compare_extern_resources);
 | |
| 
 | |
|     return extern_resources;
 | |
| }
 | |
| 
 | |
| static void generate_vsir_scan_required_features(struct hlsl_ctx *ctx, struct vsir_program *program)
 | |
| {
 | |
|     struct extern_resource *extern_resources;
 | |
|     unsigned int extern_resources_count;
 | |
| 
 | |
|     extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
 | |
|     for (unsigned int i = 0; i < extern_resources_count; ++i)
 | |
|     {
 | |
|         if (extern_resources[i].component_type && extern_resources[i].component_type->e.resource.rasteriser_ordered)
 | |
|             program->features.rovs = true;
 | |
|     }
 | |
|     sm4_free_extern_resources(extern_resources, extern_resources_count);
 | |
| 
 | |
|     /* FIXME: We also emit code that should require UAVS_AT_EVERY_STAGE,
 | |
|      * STENCIL_REF, and TYPED_UAV_LOAD_ADDITIONAL_FORMATS. */
 | |
| }
 | |
| 
 | |
| static void generate_vsir_scan_global_flags(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct hlsl_ir_function_decl *entry_func)
 | |
| {
 | |
|     const struct vkd3d_shader_version *version = &program->shader_version;
 | |
|     struct extern_resource *extern_resources;
 | |
|     unsigned int extern_resources_count, i;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
 | |
| 
 | |
|     if (version->major == 4)
 | |
|     {
 | |
|         for (i = 0; i < extern_resources_count; ++i)
 | |
|         {
 | |
|             const struct extern_resource *resource = &extern_resources[i];
 | |
|             const struct hlsl_type *type = resource->component_type;
 | |
| 
 | |
|             if (type && type->class == HLSL_CLASS_TEXTURE && type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
 | |
|             {
 | |
|                 program->global_flags |= VKD3DSGF_ENABLE_RAW_AND_STRUCTURED_BUFFERS;
 | |
|                 break;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     sm4_free_extern_resources(extern_resources, extern_resources_count);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &entry_func->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|     {
 | |
|         const struct hlsl_type *type = var->data_type;
 | |
| 
 | |
|         if (hlsl_type_is_primitive_array(type))
 | |
|             type = var->data_type->e.array.type;
 | |
| 
 | |
|         /* Note that it doesn't matter if the semantic is unused or doesn't
 | |
|          * generate a signature element (e.g. SV_DispatchThreadID). */
 | |
|         if ((var->is_input_semantic || var->is_output_semantic)
 | |
|                 && (type->is_minimum_precision || hlsl_type_is_minimum_precision(type)))
 | |
|         {
 | |
|             program->global_flags |= VKD3DSGF_ENABLE_MINIMUM_PRECISION;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
|     /* FIXME: We also need to check for minimum-precision uniforms and local
 | |
|      * variable arithmetic. */
 | |
| 
 | |
|     if (entry_func->early_depth_test && vkd3d_shader_ver_ge(version, 5, 0))
 | |
|         program->global_flags |= VKD3DSGF_FORCE_EARLY_DEPTH_STENCIL;
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_add_dcl_constant_buffer(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct hlsl_buffer *cbuffer)
 | |
| {
 | |
|     unsigned int array_first = cbuffer->reg.index;
 | |
|     unsigned int array_last = cbuffer->reg.index; /* FIXME: array end. */
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &cbuffer->loc, VKD3DSIH_DCL_CONSTANT_BUFFER, 0, 0)))
 | |
|     {
 | |
|         ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     ins->declaration.cb.size = cbuffer->size;
 | |
| 
 | |
|     src_param = &ins->declaration.cb.src;
 | |
|     vsir_src_param_init(src_param, VKD3DSPR_CONSTBUFFER, VKD3D_DATA_FLOAT, 0);
 | |
|     src_param->reg.dimension = VSIR_DIMENSION_VEC4;
 | |
|     src_param->swizzle = VKD3D_SHADER_NO_SWIZZLE;
 | |
| 
 | |
|     ins->declaration.cb.range.space = cbuffer->reg.space;
 | |
|     ins->declaration.cb.range.first = array_first;
 | |
|     ins->declaration.cb.range.last = array_last;
 | |
| 
 | |
|     src_param->reg.idx[0].offset = cbuffer->reg.id;
 | |
|     src_param->reg.idx[1].offset = array_first;
 | |
|     src_param->reg.idx[2].offset = array_last;
 | |
|     src_param->reg.idx_count = 3;
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_add_dcl_sampler(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct extern_resource *resource)
 | |
| {
 | |
|     struct vkd3d_shader_src_param *src_param;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     unsigned int i;
 | |
| 
 | |
|     VKD3D_ASSERT(resource->regset == HLSL_REGSET_SAMPLERS);
 | |
|     VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1);
 | |
| 
 | |
|     for (i = 0; i < resource->bind_count; ++i)
 | |
|     {
 | |
|         unsigned int array_first = resource->index + i;
 | |
|         unsigned int array_last = resource->index + i; /* FIXME: array end. */
 | |
| 
 | |
|         if (resource->var && !resource->var->objects_usage[HLSL_REGSET_SAMPLERS][i].used)
 | |
|             continue;
 | |
| 
 | |
|         if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, VKD3DSIH_DCL_SAMPLER, 0, 0)))
 | |
|         {
 | |
|             ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_COMPARISON)
 | |
|             ins->flags |= VKD3DSI_SAMPLER_COMPARISON_MODE;
 | |
| 
 | |
|         src_param = &ins->declaration.sampler.src;
 | |
|         vsir_src_param_init(src_param, VKD3DSPR_SAMPLER, VKD3D_DATA_UNUSED, 0);
 | |
| 
 | |
|         ins->declaration.sampler.range.first = array_first;
 | |
|         ins->declaration.sampler.range.last = array_last;
 | |
|         ins->declaration.sampler.range.space = resource->space;
 | |
| 
 | |
|         src_param->reg.idx[0].offset = resource->id;
 | |
|         src_param->reg.idx[1].offset = array_first;
 | |
|         src_param->reg.idx[2].offset = array_last;
 | |
|         src_param->reg.idx_count = 3;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static enum vkd3d_shader_resource_type sm4_generate_vsir_get_resource_type(const struct hlsl_type *type)
 | |
| {
 | |
|     switch (type->sampler_dim)
 | |
|     {
 | |
|         case HLSL_SAMPLER_DIM_1D:
 | |
|             return VKD3D_SHADER_RESOURCE_TEXTURE_1D;
 | |
|         case HLSL_SAMPLER_DIM_2D:
 | |
|             return VKD3D_SHADER_RESOURCE_TEXTURE_2D;
 | |
|         case HLSL_SAMPLER_DIM_3D:
 | |
|             return VKD3D_SHADER_RESOURCE_TEXTURE_3D;
 | |
|         case HLSL_SAMPLER_DIM_CUBE:
 | |
|             return VKD3D_SHADER_RESOURCE_TEXTURE_CUBE;
 | |
|         case HLSL_SAMPLER_DIM_1DARRAY:
 | |
|             return VKD3D_SHADER_RESOURCE_TEXTURE_1DARRAY;
 | |
|         case HLSL_SAMPLER_DIM_2DARRAY:
 | |
|             return VKD3D_SHADER_RESOURCE_TEXTURE_2DARRAY;
 | |
|         case HLSL_SAMPLER_DIM_2DMS:
 | |
|             return VKD3D_SHADER_RESOURCE_TEXTURE_2DMS;
 | |
|         case HLSL_SAMPLER_DIM_2DMSARRAY:
 | |
|             return VKD3D_SHADER_RESOURCE_TEXTURE_2DMSARRAY;
 | |
|         case HLSL_SAMPLER_DIM_CUBEARRAY:
 | |
|             return VKD3D_SHADER_RESOURCE_TEXTURE_CUBEARRAY;
 | |
|         case HLSL_SAMPLER_DIM_BUFFER:
 | |
|         case HLSL_SAMPLER_DIM_RAW_BUFFER:
 | |
|         case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER:
 | |
|             return VKD3D_SHADER_RESOURCE_BUFFER;
 | |
|         default:
 | |
|             vkd3d_unreachable();
 | |
|     }
 | |
| }
 | |
| 
 | |
| static enum vkd3d_data_type sm4_generate_vsir_get_format_type(const struct hlsl_type *type)
 | |
| {
 | |
|     const struct hlsl_type *format = type->e.resource.format;
 | |
| 
 | |
|     switch (format->e.numeric.type)
 | |
|     {
 | |
|         case HLSL_TYPE_DOUBLE:
 | |
|             return VKD3D_DATA_DOUBLE;
 | |
| 
 | |
|         case HLSL_TYPE_FLOAT:
 | |
|         case HLSL_TYPE_HALF:
 | |
|             if (format->modifiers & HLSL_MODIFIER_UNORM)
 | |
|                 return VKD3D_DATA_UNORM;
 | |
|             if (format->modifiers & HLSL_MODIFIER_SNORM)
 | |
|                 return VKD3D_DATA_SNORM;
 | |
|             return VKD3D_DATA_FLOAT;
 | |
| 
 | |
|         case HLSL_TYPE_INT:
 | |
|             return VKD3D_DATA_INT;
 | |
| 
 | |
|         case HLSL_TYPE_BOOL:
 | |
|         case HLSL_TYPE_MIN16UINT:
 | |
|         case HLSL_TYPE_UINT:
 | |
|             return VKD3D_DATA_UINT;
 | |
|     }
 | |
| 
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_add_dcl_texture(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct extern_resource *resource,
 | |
|         bool uav)
 | |
| {
 | |
|     enum hlsl_regset regset = uav ? HLSL_REGSET_UAVS : HLSL_REGSET_TEXTURES;
 | |
|     struct vkd3d_shader_structured_resource *structured_resource;
 | |
|     struct vkd3d_shader_dst_param *dst_param;
 | |
|     struct vkd3d_shader_semantic *semantic;
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
|     struct hlsl_type *component_type;
 | |
|     enum vkd3d_shader_opcode opcode;
 | |
|     bool multisampled;
 | |
|     unsigned int i, j;
 | |
| 
 | |
|     VKD3D_ASSERT(resource->regset == regset);
 | |
|     VKD3D_ASSERT(hlsl_version_lt(ctx, 5, 1) || resource->bind_count == 1);
 | |
| 
 | |
|     component_type = resource->component_type;
 | |
| 
 | |
|     for (i = 0; i < resource->bind_count; ++i)
 | |
|     {
 | |
|         unsigned int array_first = resource->index + i;
 | |
|         unsigned int array_last = resource->index + i; /* FIXME: array end. */
 | |
| 
 | |
|         if (resource->var && !resource->var->objects_usage[regset][i].used)
 | |
|             continue;
 | |
| 
 | |
|         if (uav)
 | |
|         {
 | |
|             switch (component_type->sampler_dim)
 | |
|             {
 | |
|                 case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER:
 | |
|                     opcode = VKD3DSIH_DCL_UAV_STRUCTURED;
 | |
|                     break;
 | |
|                 case HLSL_SAMPLER_DIM_RAW_BUFFER:
 | |
|                     opcode = VKD3DSIH_DCL_UAV_RAW;
 | |
|                     break;
 | |
|                 default:
 | |
|                     opcode = VKD3DSIH_DCL_UAV_TYPED;
 | |
|                     break;
 | |
|             }
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             switch (component_type->sampler_dim)
 | |
|             {
 | |
|                 case HLSL_SAMPLER_DIM_RAW_BUFFER:
 | |
|                     opcode = VKD3DSIH_DCL_RESOURCE_RAW;
 | |
|                     break;
 | |
|                 default:
 | |
|                     opcode = VKD3DSIH_DCL;
 | |
|                     break;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         if (!(ins = generate_vsir_add_program_instruction(ctx, program, &resource->loc, opcode, 0, 0)))
 | |
|         {
 | |
|             ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|             return;
 | |
|         }
 | |
|         semantic = &ins->declaration.semantic;
 | |
|         structured_resource = &ins->declaration.structured_resource;
 | |
|         dst_param = &semantic->resource.reg;
 | |
|         vsir_dst_param_init(dst_param, uav ? VKD3DSPR_UAV : VKD3DSPR_RESOURCE, VKD3D_DATA_UNUSED, 0);
 | |
| 
 | |
|         if (uav && component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
 | |
|             structured_resource->byte_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC];
 | |
|         if (uav && component_type->e.resource.rasteriser_ordered)
 | |
|             ins->flags = VKD3DSUF_RASTERISER_ORDERED_VIEW;
 | |
| 
 | |
|         multisampled = component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMS
 | |
|                 || component_type->sampler_dim == HLSL_SAMPLER_DIM_2DMSARRAY;
 | |
| 
 | |
|         if (!hlsl_version_ge(ctx, 4, 1) && multisampled && !component_type->sample_count)
 | |
|         {
 | |
|             hlsl_error(ctx, &resource->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
 | |
|                     "Multisampled texture object declaration needs sample count for profile %u.%u.",
 | |
|                     ctx->profile->major_version, ctx->profile->minor_version);
 | |
|         }
 | |
| 
 | |
|         for (j = 0; j < 4; ++j)
 | |
|             semantic->resource_data_type[j] = sm4_generate_vsir_get_format_type(component_type);
 | |
| 
 | |
|         semantic->resource.range.first = array_first;
 | |
|         semantic->resource.range.last = array_last;
 | |
|         semantic->resource.range.space = resource->space;
 | |
| 
 | |
|         dst_param->reg.idx[0].offset = resource->id;
 | |
|         dst_param->reg.idx[1].offset = array_first;
 | |
|         dst_param->reg.idx[2].offset = array_last;
 | |
|         dst_param->reg.idx_count = 3;
 | |
| 
 | |
|         ins->resource_type = sm4_generate_vsir_get_resource_type(resource->component_type);
 | |
|         if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_RAW_BUFFER)
 | |
|             ins->raw = true;
 | |
|         if (resource->component_type->sampler_dim == HLSL_SAMPLER_DIM_STRUCTURED_BUFFER)
 | |
|         {
 | |
|             ins->structured = true;
 | |
|             ins->resource_stride = 4 * component_type->e.resource.format->reg_size[HLSL_REGSET_NUMERIC];
 | |
|         }
 | |
| 
 | |
|         if (multisampled)
 | |
|             semantic->sample_count = component_type->sample_count;
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void sm4_generate_vsir_add_dcl_stream(struct hlsl_ctx *ctx,
 | |
|         struct vsir_program *program, const struct hlsl_ir_var *var)
 | |
| {
 | |
|     struct vkd3d_shader_instruction *ins;
 | |
| 
 | |
|     if (!(ins = generate_vsir_add_program_instruction(ctx, program, &var->loc, VKD3DSIH_DCL_STREAM, 0, 1)))
 | |
|     {
 | |
|         ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     vsir_src_param_init(&ins->src[0], VKD3DSPR_STREAM, VKD3D_DATA_OPAQUE, 1);
 | |
|     ins->src[0].reg.dimension = VSIR_DIMENSION_NONE;
 | |
|     ins->src[0].reg.idx[0].offset = var->regs[HLSL_REGSET_STREAM_OUTPUTS].index;
 | |
| }
 | |
| 
 | |
| /* OBJECTIVE: Translate all the information from ctx and entry_func to the
 | |
|  * vsir_program, so it can be used as input to tpf_compile() without relying
 | |
|  * on ctx and entry_func. */
 | |
| static void sm4_generate_vsir(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *func,
 | |
|         uint64_t config_flags, struct vsir_program *program)
 | |
| {
 | |
|     struct vkd3d_shader_version version = {0};
 | |
|     struct extern_resource *extern_resources;
 | |
|     unsigned int extern_resources_count;
 | |
|     const struct hlsl_buffer *cbuffer;
 | |
| 
 | |
|     version.major = ctx->profile->major_version;
 | |
|     version.minor = ctx->profile->minor_version;
 | |
|     version.type = ctx->profile->type;
 | |
| 
 | |
|     if (!vsir_program_init(program, NULL, &version, 0, VSIR_CF_STRUCTURED, VSIR_NORMALISED_SM4))
 | |
|     {
 | |
|         ctx->result = VKD3D_ERROR_OUT_OF_MEMORY;
 | |
|         return;
 | |
|     }
 | |
| 
 | |
|     generate_vsir_signature(ctx, program, func);
 | |
|     if (version.type == VKD3D_SHADER_TYPE_HULL)
 | |
|         generate_vsir_signature(ctx, program, ctx->patch_constant_func);
 | |
| 
 | |
|     if (version.type == VKD3D_SHADER_TYPE_COMPUTE)
 | |
|     {
 | |
|         program->thread_group_size.x = ctx->thread_count[0];
 | |
|         program->thread_group_size.y = ctx->thread_count[1];
 | |
|         program->thread_group_size.z = ctx->thread_count[2];
 | |
|     }
 | |
|     else if (version.type == VKD3D_SHADER_TYPE_HULL)
 | |
|     {
 | |
|         program->input_control_point_count = ctx->input_control_point_count == UINT_MAX
 | |
|                 ? 1 : ctx->input_control_point_count;
 | |
|         program->output_control_point_count = ctx->output_control_point_count;
 | |
|         program->tess_domain = ctx->domain;
 | |
|         program->tess_partitioning = ctx->partitioning;
 | |
|         program->tess_output_primitive = ctx->output_primitive;
 | |
|     }
 | |
|     else if (version.type == VKD3D_SHADER_TYPE_DOMAIN)
 | |
|     {
 | |
|         program->input_control_point_count = ctx->input_control_point_count == UINT_MAX
 | |
|                 ? 0 : ctx->input_control_point_count;
 | |
|         program->tess_domain = ctx->domain;
 | |
|     }
 | |
|     else if (version.type == VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|     {
 | |
|         program->input_control_point_count = ctx->input_control_point_count;
 | |
|         program->input_primitive = ctx->input_primitive_type;
 | |
|         program->output_topology = ctx->output_topology_type;
 | |
|         program->vertices_out_count = ctx->max_vertex_count;
 | |
|     }
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
 | |
|     {
 | |
|         if (cbuffer->reg.allocated)
 | |
|             sm4_generate_vsir_add_dcl_constant_buffer(ctx, program, cbuffer);
 | |
|     }
 | |
| 
 | |
|     extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
 | |
|     for (unsigned int i = 0; i < extern_resources_count; ++i)
 | |
|     {
 | |
|         const struct extern_resource *resource = &extern_resources[i];
 | |
| 
 | |
|         if (resource->regset == HLSL_REGSET_SAMPLERS)
 | |
|             sm4_generate_vsir_add_dcl_sampler(ctx, program, resource);
 | |
|         else if (resource->regset == HLSL_REGSET_TEXTURES)
 | |
|             sm4_generate_vsir_add_dcl_texture(ctx, program, resource, false);
 | |
|         else if (resource->regset == HLSL_REGSET_UAVS)
 | |
|             sm4_generate_vsir_add_dcl_texture(ctx, program, resource, true);
 | |
|     }
 | |
|     sm4_free_extern_resources(extern_resources, extern_resources_count);
 | |
| 
 | |
|     if (version.type == VKD3D_SHADER_TYPE_GEOMETRY && version.major >= 5)
 | |
|     {
 | |
|         const struct hlsl_ir_var *var;
 | |
| 
 | |
|         LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|         {
 | |
|             if (var->bind_count[HLSL_REGSET_STREAM_OUTPUTS])
 | |
|                 sm4_generate_vsir_add_dcl_stream(ctx, program, var);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (version.type == VKD3D_SHADER_TYPE_HULL)
 | |
|         generate_vsir_add_program_instruction(ctx, program,
 | |
|                 &ctx->patch_constant_func->loc, VKD3DSIH_HS_CONTROL_POINT_PHASE, 0, 0);
 | |
|     sm4_generate_vsir_add_function(ctx, func, config_flags, program);
 | |
|     if (version.type == VKD3D_SHADER_TYPE_HULL)
 | |
|     {
 | |
|         generate_vsir_add_program_instruction(ctx, program,
 | |
|                 &ctx->patch_constant_func->loc, VKD3DSIH_HS_FORK_PHASE, 0, 0);
 | |
|         sm4_generate_vsir_add_function(ctx, ctx->patch_constant_func, config_flags, program);
 | |
|     }
 | |
| 
 | |
|     generate_vsir_scan_required_features(ctx, program);
 | |
|     generate_vsir_scan_global_flags(ctx, program, func);
 | |
| }
 | |
| 
 | |
| /* For some reason, for matrices, values from default value initializers end
 | |
|  * up in different components than from regular initializers. Default value
 | |
|  * initializers fill the matrix in vertical reading order
 | |
|  * (left-to-right top-to-bottom) instead of regular reading order
 | |
|  * (top-to-bottom left-to-right), so they have to be adjusted. An exception is
 | |
|  * that the order of matrix initializers for function parameters are row-major
 | |
|  * (top-to-bottom left-to-right). */
 | |
| static unsigned int get_component_index_from_default_initializer_index(struct hlsl_type *type, unsigned int index)
 | |
| {
 | |
|     unsigned int element_comp_count, element, x, y, i;
 | |
|     unsigned int base = 0;
 | |
| 
 | |
|     switch (type->class)
 | |
|     {
 | |
|         case HLSL_CLASS_MATRIX:
 | |
|             x = index / type->e.numeric.dimy;
 | |
|             y = index % type->e.numeric.dimy;
 | |
|             return y * type->e.numeric.dimx + x;
 | |
| 
 | |
|         case HLSL_CLASS_ARRAY:
 | |
|             element_comp_count = hlsl_type_component_count(type->e.array.type);
 | |
|             element = index / element_comp_count;
 | |
|             base = element * element_comp_count;
 | |
|             return base + get_component_index_from_default_initializer_index(type->e.array.type, index - base);
 | |
| 
 | |
|         case HLSL_CLASS_STRUCT:
 | |
|             for (i = 0; i < type->e.record.field_count; ++i)
 | |
|             {
 | |
|                 struct hlsl_type *field_type = type->e.record.fields[i].type;
 | |
| 
 | |
|                 element_comp_count = hlsl_type_component_count(field_type);
 | |
|                 if (index - base < element_comp_count)
 | |
|                     return base + get_component_index_from_default_initializer_index(field_type, index - base);
 | |
|                 base += element_comp_count;
 | |
|             }
 | |
|             break;
 | |
| 
 | |
|         default:
 | |
|             return index;
 | |
|     }
 | |
| 
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| static D3D_SRV_DIMENSION sm4_rdef_resource_dimension(const struct hlsl_type *type)
 | |
| {
 | |
|     switch (type->sampler_dim)
 | |
|     {
 | |
|         case HLSL_SAMPLER_DIM_1D:
 | |
|             return D3D_SRV_DIMENSION_TEXTURE1D;
 | |
|         case HLSL_SAMPLER_DIM_2D:
 | |
|             return D3D_SRV_DIMENSION_TEXTURE2D;
 | |
|         case HLSL_SAMPLER_DIM_3D:
 | |
|             return D3D_SRV_DIMENSION_TEXTURE3D;
 | |
|         case HLSL_SAMPLER_DIM_CUBE:
 | |
|             return D3D_SRV_DIMENSION_TEXTURECUBE;
 | |
|         case HLSL_SAMPLER_DIM_1DARRAY:
 | |
|             return D3D_SRV_DIMENSION_TEXTURE1DARRAY;
 | |
|         case HLSL_SAMPLER_DIM_2DARRAY:
 | |
|             return D3D_SRV_DIMENSION_TEXTURE2DARRAY;
 | |
|         case HLSL_SAMPLER_DIM_2DMS:
 | |
|             return D3D_SRV_DIMENSION_TEXTURE2DMS;
 | |
|         case HLSL_SAMPLER_DIM_2DMSARRAY:
 | |
|             return D3D_SRV_DIMENSION_TEXTURE2DMSARRAY;
 | |
|         case HLSL_SAMPLER_DIM_CUBEARRAY:
 | |
|             return D3D_SRV_DIMENSION_TEXTURECUBEARRAY;
 | |
|         case HLSL_SAMPLER_DIM_BUFFER:
 | |
|         case HLSL_SAMPLER_DIM_RAW_BUFFER:
 | |
|         case HLSL_SAMPLER_DIM_STRUCTURED_BUFFER:
 | |
|             return D3D_SRV_DIMENSION_BUFFER;
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| static enum D3D_RESOURCE_RETURN_TYPE sm4_data_type(const struct hlsl_type *type)
 | |
| {
 | |
|     const struct hlsl_type *format = type->e.resource.format;
 | |
| 
 | |
|     switch (format->e.numeric.type)
 | |
|     {
 | |
|         case HLSL_TYPE_DOUBLE:
 | |
|             return D3D_RETURN_TYPE_DOUBLE;
 | |
| 
 | |
|         case HLSL_TYPE_FLOAT:
 | |
|         case HLSL_TYPE_HALF:
 | |
|             if (format->modifiers & HLSL_MODIFIER_UNORM)
 | |
|                 return D3D_RETURN_TYPE_UNORM;
 | |
|             if (format->modifiers & HLSL_MODIFIER_SNORM)
 | |
|                 return D3D_RETURN_TYPE_SNORM;
 | |
|             return D3D_RETURN_TYPE_FLOAT;
 | |
| 
 | |
|         case HLSL_TYPE_INT:
 | |
|             return D3D_RETURN_TYPE_SINT;
 | |
|             break;
 | |
| 
 | |
|         case HLSL_TYPE_BOOL:
 | |
|         case HLSL_TYPE_MIN16UINT:
 | |
|         case HLSL_TYPE_UINT:
 | |
|             return D3D_RETURN_TYPE_UINT;
 | |
|     }
 | |
| 
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| static D3D_SHADER_INPUT_TYPE sm4_resource_type(const struct hlsl_type *type)
 | |
| {
 | |
|     switch (type->class)
 | |
|     {
 | |
|         case HLSL_CLASS_SAMPLER:
 | |
|             return D3D_SIT_SAMPLER;
 | |
|         case HLSL_CLASS_TEXTURE:
 | |
|             return D3D_SIT_TEXTURE;
 | |
|         case HLSL_CLASS_UAV:
 | |
|             return D3D_SIT_UAV_RWTYPED;
 | |
|         default:
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| static D3D_SHADER_VARIABLE_CLASS sm4_class(const struct hlsl_type *type)
 | |
| {
 | |
|     switch (type->class)
 | |
|     {
 | |
|         case HLSL_CLASS_MATRIX:
 | |
|             VKD3D_ASSERT(type->modifiers & HLSL_MODIFIERS_MAJORITY_MASK);
 | |
|             if (type->modifiers & HLSL_MODIFIER_COLUMN_MAJOR)
 | |
|                 return D3D_SVC_MATRIX_COLUMNS;
 | |
|             else
 | |
|                 return D3D_SVC_MATRIX_ROWS;
 | |
|         case HLSL_CLASS_SCALAR:
 | |
|             return D3D_SVC_SCALAR;
 | |
|         case HLSL_CLASS_VECTOR:
 | |
|             return D3D_SVC_VECTOR;
 | |
| 
 | |
|         case HLSL_CLASS_ARRAY:
 | |
|         case HLSL_CLASS_DEPTH_STENCIL_STATE:
 | |
|         case HLSL_CLASS_DEPTH_STENCIL_VIEW:
 | |
|         case HLSL_CLASS_EFFECT_GROUP:
 | |
|         case HLSL_CLASS_ERROR:
 | |
|         case HLSL_CLASS_STRUCT:
 | |
|         case HLSL_CLASS_PASS:
 | |
|         case HLSL_CLASS_PIXEL_SHADER:
 | |
|         case HLSL_CLASS_RASTERIZER_STATE:
 | |
|         case HLSL_CLASS_RENDER_TARGET_VIEW:
 | |
|         case HLSL_CLASS_SAMPLER:
 | |
|         case HLSL_CLASS_STRING:
 | |
|         case HLSL_CLASS_TECHNIQUE:
 | |
|         case HLSL_CLASS_TEXTURE:
 | |
|         case HLSL_CLASS_UAV:
 | |
|         case HLSL_CLASS_VERTEX_SHADER:
 | |
|         case HLSL_CLASS_VOID:
 | |
|         case HLSL_CLASS_CONSTANT_BUFFER:
 | |
|         case HLSL_CLASS_COMPUTE_SHADER:
 | |
|         case HLSL_CLASS_DOMAIN_SHADER:
 | |
|         case HLSL_CLASS_HULL_SHADER:
 | |
|         case HLSL_CLASS_GEOMETRY_SHADER:
 | |
|         case HLSL_CLASS_BLEND_STATE:
 | |
|         case HLSL_CLASS_STREAM_OUTPUT:
 | |
|         case HLSL_CLASS_NULL:
 | |
|             break;
 | |
|     }
 | |
| 
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| static D3D_SHADER_VARIABLE_TYPE sm4_base_type(const struct hlsl_type *type)
 | |
| {
 | |
|     switch (type->e.numeric.type)
 | |
|     {
 | |
|         case HLSL_TYPE_BOOL:
 | |
|             return D3D_SVT_BOOL;
 | |
|         case HLSL_TYPE_DOUBLE:
 | |
|             return D3D_SVT_DOUBLE;
 | |
|         case HLSL_TYPE_FLOAT:
 | |
|         case HLSL_TYPE_HALF:
 | |
|             return D3D_SVT_FLOAT;
 | |
|         case HLSL_TYPE_INT:
 | |
|             return D3D_SVT_INT;
 | |
|         case HLSL_TYPE_UINT:
 | |
|             return D3D_SVT_UINT;
 | |
|         case HLSL_TYPE_MIN16UINT:
 | |
|             return D3D_SVT_MIN16UINT;
 | |
|     }
 | |
| 
 | |
|     vkd3d_unreachable();
 | |
| }
 | |
| 
 | |
| static void write_sm4_type(struct hlsl_ctx *ctx, struct vkd3d_bytecode_buffer *buffer, struct hlsl_type *type)
 | |
| {
 | |
|     const struct hlsl_type *array_type = hlsl_get_multiarray_element_type(type);
 | |
|     const char *name = array_type->name ? array_type->name : "<unnamed>";
 | |
|     const struct hlsl_profile_info *profile = ctx->profile;
 | |
|     unsigned int array_size = 0;
 | |
|     size_t name_offset = 0;
 | |
|     size_t i;
 | |
| 
 | |
|     if (type->bytecode_offset)
 | |
|         return;
 | |
| 
 | |
|     if (profile->major_version >= 5)
 | |
|         name_offset = put_string(buffer, name);
 | |
| 
 | |
|     if (type->class == HLSL_CLASS_ARRAY)
 | |
|         array_size = hlsl_get_multiarray_size(type);
 | |
| 
 | |
|     if (array_type->class == HLSL_CLASS_STRUCT)
 | |
|     {
 | |
|         unsigned int field_count = 0;
 | |
|         size_t fields_offset = 0;
 | |
| 
 | |
|         for (i = 0; i < array_type->e.record.field_count; ++i)
 | |
|         {
 | |
|             struct hlsl_struct_field *field = &array_type->e.record.fields[i];
 | |
| 
 | |
|             if (!field->type->reg_size[HLSL_REGSET_NUMERIC])
 | |
|                 continue;
 | |
| 
 | |
|             field->name_bytecode_offset = put_string(buffer, field->name);
 | |
|             write_sm4_type(ctx, buffer, field->type);
 | |
|             ++field_count;
 | |
|         }
 | |
| 
 | |
|         fields_offset = bytecode_align(buffer);
 | |
| 
 | |
|         for (i = 0; i < array_type->e.record.field_count; ++i)
 | |
|         {
 | |
|             struct hlsl_struct_field *field = &array_type->e.record.fields[i];
 | |
| 
 | |
|             if (!field->type->reg_size[HLSL_REGSET_NUMERIC])
 | |
|                 continue;
 | |
| 
 | |
|             put_u32(buffer, field->name_bytecode_offset);
 | |
|             put_u32(buffer, field->type->bytecode_offset);
 | |
|             put_u32(buffer, field->reg_offset[HLSL_REGSET_NUMERIC] * sizeof(float));
 | |
|         }
 | |
|         type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(D3D_SVC_STRUCT, D3D_SVT_VOID));
 | |
|         put_u32(buffer, vkd3d_make_u32(1, hlsl_type_component_count(array_type)));
 | |
|         put_u32(buffer, vkd3d_make_u32(array_size, field_count));
 | |
|         put_u32(buffer, fields_offset);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         VKD3D_ASSERT(array_type->class <= HLSL_CLASS_LAST_NUMERIC);
 | |
|         type->bytecode_offset = put_u32(buffer, vkd3d_make_u32(sm4_class(array_type), sm4_base_type(array_type)));
 | |
|         put_u32(buffer, vkd3d_make_u32(array_type->e.numeric.dimy, array_type->e.numeric.dimx));
 | |
|         put_u32(buffer, vkd3d_make_u32(array_size, 0));
 | |
|         put_u32(buffer, 1);
 | |
|     }
 | |
| 
 | |
|     if (profile->major_version >= 5)
 | |
|     {
 | |
|         put_u32(buffer, 0); /* FIXME: unknown */
 | |
|         put_u32(buffer, 0); /* FIXME: unknown */
 | |
|         put_u32(buffer, 0); /* FIXME: unknown */
 | |
|         put_u32(buffer, 0); /* FIXME: unknown */
 | |
|         put_u32(buffer, name_offset);
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void sm4_generate_rdef(struct hlsl_ctx *ctx, struct vkd3d_shader_code *rdef)
 | |
| {
 | |
|     uint32_t binding_desc_size = (hlsl_version_ge(ctx, 5, 1) ? 10 : 8) * sizeof(uint32_t);
 | |
|     size_t cbuffers_offset, resources_offset, creator_offset, string_offset;
 | |
|     unsigned int cbuffer_count = 0, extern_resources_count, i, j;
 | |
|     size_t cbuffer_position, resource_position, creator_position;
 | |
|     const struct hlsl_profile_info *profile = ctx->profile;
 | |
|     struct vkd3d_bytecode_buffer buffer = {0};
 | |
|     struct extern_resource *extern_resources;
 | |
|     const struct hlsl_buffer *cbuffer;
 | |
|     const struct hlsl_ir_var *var;
 | |
| 
 | |
|     static const uint16_t target_types[] =
 | |
|     {
 | |
|         0xffff, /* PIXEL */
 | |
|         0xfffe, /* VERTEX */
 | |
|         0x4753, /* GEOMETRY */
 | |
|         0x4853, /* HULL */
 | |
|         0x4453, /* DOMAIN */
 | |
|         0x4353, /* COMPUTE */
 | |
|     };
 | |
| 
 | |
|     extern_resources = sm4_get_extern_resources(ctx, &extern_resources_count);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
 | |
|     {
 | |
|         if (cbuffer->reg.allocated)
 | |
|             ++cbuffer_count;
 | |
|     }
 | |
| 
 | |
|     put_u32(&buffer, cbuffer_count);
 | |
|     cbuffer_position = put_u32(&buffer, 0);
 | |
|     put_u32(&buffer, extern_resources_count);
 | |
|     resource_position = put_u32(&buffer, 0);
 | |
|     put_u32(&buffer, vkd3d_make_u32(vkd3d_make_u16(profile->minor_version, profile->major_version),
 | |
|             target_types[profile->type]));
 | |
|     put_u32(&buffer, 0); /* FIXME: compilation flags */
 | |
|     creator_position = put_u32(&buffer, 0);
 | |
| 
 | |
|     if (profile->major_version >= 5)
 | |
|     {
 | |
|         put_u32(&buffer, hlsl_version_ge(ctx, 5, 1) ? TAG_RD11_REVERSE : TAG_RD11);
 | |
|         put_u32(&buffer, 15 * sizeof(uint32_t)); /* size of RDEF header including this header */
 | |
|         put_u32(&buffer, 6 * sizeof(uint32_t)); /* size of buffer desc */
 | |
|         put_u32(&buffer, binding_desc_size); /* size of binding desc */
 | |
|         put_u32(&buffer, 10 * sizeof(uint32_t)); /* size of variable desc */
 | |
|         put_u32(&buffer, 9 * sizeof(uint32_t)); /* size of type desc */
 | |
|         put_u32(&buffer, 3 * sizeof(uint32_t)); /* size of member desc */
 | |
|         put_u32(&buffer, 0); /* unknown; possibly a null terminator */
 | |
|     }
 | |
| 
 | |
|     /* Bound resources. */
 | |
| 
 | |
|     resources_offset = bytecode_align(&buffer);
 | |
|     set_u32(&buffer, resource_position, resources_offset);
 | |
| 
 | |
|     for (i = 0; i < extern_resources_count; ++i)
 | |
|     {
 | |
|         const struct extern_resource *resource = &extern_resources[i];
 | |
|         uint32_t flags = 0;
 | |
| 
 | |
|         if (resource->is_user_packed)
 | |
|             flags |= D3D_SIF_USERPACKED;
 | |
| 
 | |
|         put_u32(&buffer, 0); /* name */
 | |
|         if (resource->buffer)
 | |
|             put_u32(&buffer, resource->buffer->type == HLSL_BUFFER_CONSTANT ? D3D_SIT_CBUFFER : D3D_SIT_TBUFFER);
 | |
|         else
 | |
|             put_u32(&buffer, sm4_resource_type(resource->component_type));
 | |
|         if (resource->regset == HLSL_REGSET_TEXTURES || resource->regset == HLSL_REGSET_UAVS)
 | |
|         {
 | |
|             unsigned int dimx = resource->component_type->e.resource.format->e.numeric.dimx;
 | |
| 
 | |
|             put_u32(&buffer, sm4_data_type(resource->component_type));
 | |
|             put_u32(&buffer, sm4_rdef_resource_dimension(resource->component_type));
 | |
|             put_u32(&buffer, ~0u); /* FIXME: multisample count */
 | |
|             flags |= (dimx - 1) << VKD3D_SM4_SIF_TEXTURE_COMPONENTS_SHIFT;
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             put_u32(&buffer, 0);
 | |
|             put_u32(&buffer, 0);
 | |
|             put_u32(&buffer, 0);
 | |
|         }
 | |
|         put_u32(&buffer, resource->index);
 | |
|         put_u32(&buffer, resource->bind_count);
 | |
|         put_u32(&buffer, flags);
 | |
| 
 | |
|         if (hlsl_version_ge(ctx, 5, 1))
 | |
|         {
 | |
|             put_u32(&buffer, resource->space);
 | |
|             put_u32(&buffer, resource->id);
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     for (i = 0; i < extern_resources_count; ++i)
 | |
|     {
 | |
|         const struct extern_resource *resource = &extern_resources[i];
 | |
| 
 | |
|         string_offset = put_string(&buffer, resource->name);
 | |
|         set_u32(&buffer, resources_offset + i * binding_desc_size, string_offset);
 | |
|     }
 | |
| 
 | |
|     /* Buffers. */
 | |
| 
 | |
|     cbuffers_offset = bytecode_align(&buffer);
 | |
|     set_u32(&buffer, cbuffer_position, cbuffers_offset);
 | |
|     LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
 | |
|     {
 | |
|         unsigned int var_count = 0;
 | |
| 
 | |
|         if (!cbuffer->reg.allocated)
 | |
|             continue;
 | |
| 
 | |
|         LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|         {
 | |
|             if (var->is_uniform && var->buffer == cbuffer && var->data_type->reg_size[HLSL_REGSET_NUMERIC])
 | |
|                 ++var_count;
 | |
|         }
 | |
| 
 | |
|         put_u32(&buffer, 0); /* name */
 | |
|         put_u32(&buffer, var_count);
 | |
|         put_u32(&buffer, 0); /* variable offset */
 | |
|         put_u32(&buffer, align(cbuffer->size, 4) * sizeof(float));
 | |
|         put_u32(&buffer, 0); /* FIXME: flags */
 | |
|         put_u32(&buffer, cbuffer->type == HLSL_BUFFER_CONSTANT ? D3D_CT_CBUFFER : D3D_CT_TBUFFER);
 | |
|     }
 | |
| 
 | |
|     i = 0;
 | |
|     LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
 | |
|     {
 | |
|         if (!cbuffer->reg.allocated)
 | |
|             continue;
 | |
| 
 | |
|         string_offset = put_string(&buffer, cbuffer->name);
 | |
|         set_u32(&buffer, cbuffers_offset + i++ * 6 * sizeof(uint32_t), string_offset);
 | |
|     }
 | |
| 
 | |
|     i = 0;
 | |
|     LIST_FOR_EACH_ENTRY(cbuffer, &ctx->buffers, struct hlsl_buffer, entry)
 | |
|     {
 | |
|         size_t vars_start = bytecode_align(&buffer);
 | |
| 
 | |
|         if (!cbuffer->reg.allocated)
 | |
|             continue;
 | |
| 
 | |
|         set_u32(&buffer, cbuffers_offset + (i++ * 6 + 2) * sizeof(uint32_t), vars_start);
 | |
| 
 | |
|         LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|         {
 | |
|             uint32_t flags = 0;
 | |
| 
 | |
|             if (!var->is_uniform || var->buffer != cbuffer || !var->data_type->reg_size[HLSL_REGSET_NUMERIC])
 | |
|                 continue;
 | |
| 
 | |
|             if (var->is_read)
 | |
|                 flags |= D3D_SVF_USED;
 | |
| 
 | |
|             put_u32(&buffer, 0); /* name */
 | |
|             put_u32(&buffer, var->buffer_offset * sizeof(float));
 | |
|             put_u32(&buffer, var->data_type->reg_size[HLSL_REGSET_NUMERIC] * sizeof(float));
 | |
|             put_u32(&buffer, flags);
 | |
|             put_u32(&buffer, 0); /* type */
 | |
|             put_u32(&buffer, 0); /* default value */
 | |
| 
 | |
|             if (profile->major_version >= 5)
 | |
|             {
 | |
|                 put_u32(&buffer, 0); /* texture start */
 | |
|                 put_u32(&buffer, 0); /* texture count */
 | |
|                 put_u32(&buffer, 0); /* sampler start */
 | |
|                 put_u32(&buffer, 0); /* sampler count */
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         j = 0;
 | |
|         LIST_FOR_EACH_ENTRY(var, &ctx->extern_vars, struct hlsl_ir_var, extern_entry)
 | |
|         {
 | |
|             const unsigned int var_size = (profile->major_version >= 5 ? 10 : 6);
 | |
|             size_t var_offset = vars_start + j * var_size * sizeof(uint32_t);
 | |
| 
 | |
|             if (!var->is_uniform || var->buffer != cbuffer || !var->data_type->reg_size[HLSL_REGSET_NUMERIC])
 | |
|                 continue;
 | |
| 
 | |
|             string_offset = put_string(&buffer, var->name);
 | |
|             set_u32(&buffer, var_offset, string_offset);
 | |
|             write_sm4_type(ctx, &buffer, var->data_type);
 | |
|             set_u32(&buffer, var_offset + 4 * sizeof(uint32_t), var->data_type->bytecode_offset);
 | |
| 
 | |
|             if (var->default_values)
 | |
|             {
 | |
|                 unsigned int reg_size = var->data_type->reg_size[HLSL_REGSET_NUMERIC];
 | |
|                 unsigned int comp_count = hlsl_type_component_count(var->data_type);
 | |
|                 unsigned int default_value_offset;
 | |
|                 unsigned int k;
 | |
| 
 | |
|                 default_value_offset = bytecode_reserve_bytes(&buffer, reg_size * sizeof(uint32_t));
 | |
|                 set_u32(&buffer, var_offset + 5 * sizeof(uint32_t), default_value_offset);
 | |
| 
 | |
|                 for (k = 0; k < comp_count; ++k)
 | |
|                 {
 | |
|                     struct hlsl_type *comp_type = hlsl_type_get_component_type(ctx, var->data_type, k);
 | |
|                     unsigned int comp_offset, comp_index;
 | |
|                     enum hlsl_regset regset;
 | |
| 
 | |
|                     if (comp_type->class == HLSL_CLASS_STRING)
 | |
|                     {
 | |
|                         hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_TYPE,
 | |
|                                 "Cannot write string default value.");
 | |
|                         continue;
 | |
|                     }
 | |
| 
 | |
|                     comp_index = get_component_index_from_default_initializer_index(var->data_type, k);
 | |
|                     comp_offset = hlsl_type_get_component_offset(ctx, var->data_type, comp_index, ®set);
 | |
|                     if (regset == HLSL_REGSET_NUMERIC)
 | |
|                     {
 | |
|                         if (comp_type->e.numeric.type == HLSL_TYPE_DOUBLE)
 | |
|                             hlsl_fixme(ctx, &var->loc, "Write double default values.");
 | |
| 
 | |
|                         set_u32(&buffer, default_value_offset + comp_offset * sizeof(uint32_t),
 | |
|                                 var->default_values[k].number.u);
 | |
|                     }
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             ++j;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     creator_offset = put_string(&buffer, vkd3d_shader_get_version(NULL, NULL));
 | |
|     set_u32(&buffer, creator_position, creator_offset);
 | |
| 
 | |
|     sm4_free_extern_resources(extern_resources, extern_resources_count);
 | |
| 
 | |
|     if (buffer.status)
 | |
|     {
 | |
|         vkd3d_free(buffer.data);
 | |
|         ctx->result = buffer.status;
 | |
|         return;
 | |
|     }
 | |
|     rdef->code = buffer.data;
 | |
|     rdef->size = buffer.size;
 | |
| }
 | |
| 
 | |
| static bool loop_unrolling_generate_const_bool_store(struct hlsl_ctx *ctx, struct hlsl_ir_var *var,
 | |
|         bool val, struct hlsl_block *block, struct vkd3d_shader_location *loc)
 | |
| {
 | |
|     struct hlsl_ir_node *const_node;
 | |
| 
 | |
|     if (!(const_node = hlsl_new_bool_constant(ctx, val, loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, const_node);
 | |
| 
 | |
|     hlsl_block_add_simple_store(ctx, block, var, const_node);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued);
 | |
| 
 | |
| static bool loop_unrolling_remove_jumps_visit(struct hlsl_ctx *ctx, struct hlsl_ir_node *node,
 | |
|         struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
 | |
| {
 | |
|     struct hlsl_ir_jump *jump;
 | |
|     struct hlsl_ir_var *var;
 | |
|     struct hlsl_block draft;
 | |
|     struct hlsl_ir_if *iff;
 | |
| 
 | |
|     if (node->type == HLSL_IR_IF)
 | |
|     {
 | |
|         iff = hlsl_ir_if(node);
 | |
|         if (loop_unrolling_remove_jumps_recurse(ctx, &iff->then_block, loop_broken, loop_continued))
 | |
|             return true;
 | |
|         if (loop_unrolling_remove_jumps_recurse(ctx, &iff->else_block, loop_broken, loop_continued))
 | |
|             return true;
 | |
|         return false;
 | |
|     }
 | |
| 
 | |
|     if (node->type == HLSL_IR_JUMP)
 | |
|     {
 | |
|         jump = hlsl_ir_jump(node);
 | |
|         if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE && jump->type != HLSL_IR_JUMP_BREAK)
 | |
|             return false;
 | |
| 
 | |
|         hlsl_block_init(&draft);
 | |
| 
 | |
|         if (jump->type == HLSL_IR_JUMP_UNRESOLVED_CONTINUE)
 | |
|             var = loop_continued;
 | |
|         else
 | |
|             var = loop_broken;
 | |
| 
 | |
|         if (!loop_unrolling_generate_const_bool_store(ctx, var, true, &draft, &jump->node.loc))
 | |
|             return false;
 | |
| 
 | |
|         list_move_before(&jump->node.entry, &draft.instrs);
 | |
|         list_remove(&jump->node.entry);
 | |
|         hlsl_free_instr(&jump->node);
 | |
| 
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static struct hlsl_ir_if *loop_unrolling_generate_var_check(struct hlsl_ctx *ctx,
 | |
|         struct hlsl_block *dst, struct hlsl_ir_var *var, struct vkd3d_shader_location *loc)
 | |
| {
 | |
|     struct hlsl_ir_node *cond, *load, *iff;
 | |
|     struct hlsl_block then_block;
 | |
| 
 | |
|     hlsl_block_init(&then_block);
 | |
| 
 | |
|     load = hlsl_block_add_simple_load(ctx, dst, var, loc);
 | |
|     cond = hlsl_block_add_unary_expr(ctx, dst, HLSL_OP1_LOGIC_NOT, load, loc);
 | |
| 
 | |
|     if (!(iff = hlsl_new_if(ctx, cond, &then_block, NULL, loc)))
 | |
|         return NULL;
 | |
|     hlsl_block_add_instr(dst, iff);
 | |
| 
 | |
|     return hlsl_ir_if(iff);
 | |
| }
 | |
| 
 | |
| static bool loop_unrolling_remove_jumps_recurse(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
 | |
| {
 | |
|     struct hlsl_ir_node *node, *next;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY_SAFE(node, next, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         struct hlsl_ir_if *broken_check, *continued_check;
 | |
|         struct hlsl_block draft;
 | |
| 
 | |
|         if (!loop_unrolling_remove_jumps_visit(ctx, node, loop_broken, loop_continued))
 | |
|             continue;
 | |
| 
 | |
|         if (&next->entry == &block->instrs)
 | |
|             return true;
 | |
| 
 | |
|         hlsl_block_init(&draft);
 | |
| 
 | |
|         broken_check = loop_unrolling_generate_var_check(ctx, &draft, loop_broken, &next->loc);
 | |
|         continued_check = loop_unrolling_generate_var_check(ctx,
 | |
|                 &broken_check->then_block, loop_continued, &next->loc);
 | |
| 
 | |
|         list_move_before(&next->entry, &draft.instrs);
 | |
| 
 | |
|         list_move_slice_tail(&continued_check->then_block.instrs, &next->entry, list_tail(&block->instrs));
 | |
| 
 | |
|         return true;
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static void loop_unrolling_remove_jumps(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct hlsl_ir_var *loop_broken, struct hlsl_ir_var *loop_continued)
 | |
| {
 | |
|     while (loop_unrolling_remove_jumps_recurse(ctx, block, loop_broken, loop_continued));
 | |
| }
 | |
| 
 | |
| static unsigned int loop_unrolling_get_max_iterations(struct hlsl_ctx *ctx, struct hlsl_ir_loop *loop)
 | |
| {
 | |
|     /* Always use the explicit limit if it has been passed. */
 | |
|     if (loop->unroll_limit)
 | |
|         return loop->unroll_limit;
 | |
| 
 | |
|     /* All SMs will default to 1024 if [unroll] has been specified without an explicit limit. */
 | |
|     if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL)
 | |
|         return 1024;
 | |
| 
 | |
|     /* SM4 limits implicit unrolling to 254 iterations. */
 | |
|     if (hlsl_version_ge(ctx, 4, 0))
 | |
|         return 254;
 | |
| 
 | |
|     /* SM<3 implicitly unrolls up to 1024 iterations. */
 | |
|     return 1024;
 | |
| }
 | |
| 
 | |
| static void loop_unrolling_simplify(struct hlsl_ctx *ctx, struct hlsl_block *block,
 | |
|         struct copy_propagation_state *state, unsigned int *index)
 | |
| {
 | |
|     size_t scopes_depth = state->scope_count - 1;
 | |
|     unsigned int current_index;
 | |
|     bool progress;
 | |
| 
 | |
|     do
 | |
|     {
 | |
|         state->stopped = false;
 | |
|         for (size_t i = state->scope_count; scopes_depth < i; --i)
 | |
|             copy_propagation_pop_scope(state);
 | |
|         copy_propagation_push_scope(state, ctx);
 | |
| 
 | |
|         progress = simplify_exprs(ctx, block);
 | |
| 
 | |
|         current_index = index_instructions(block, *index);
 | |
|         progress |= copy_propagation_transform_block(ctx, block, state);
 | |
| 
 | |
|         progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, block, NULL);
 | |
|         progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, block, NULL);
 | |
|         progress |= hlsl_transform_ir(ctx, remove_trivial_conditional_branches, block, NULL);
 | |
|     } while (progress);
 | |
| 
 | |
|     *index = current_index;
 | |
| }
 | |
| 
 | |
| static bool loop_unrolling_check_val(struct copy_propagation_state *state, struct hlsl_ir_var *var)
 | |
| {
 | |
|     struct copy_propagation_value *v;
 | |
| 
 | |
|     if (!(v = copy_propagation_get_value(state, var, 0, UINT_MAX))
 | |
|             || v->node->type != HLSL_IR_CONSTANT)
 | |
|         return false;
 | |
| 
 | |
|     return hlsl_ir_constant(v->node)->value.u[0].u;
 | |
| }
 | |
| 
 | |
| static bool loop_unrolling_unroll_loop(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *loop)
 | |
| {
 | |
|     struct hlsl_block draft, tmp_dst, loop_body;
 | |
|     struct hlsl_ir_var *broken, *continued;
 | |
|     unsigned int max_iterations, i, index;
 | |
|     struct copy_propagation_state state;
 | |
|     struct hlsl_ir_if *target_if;
 | |
| 
 | |
|     if (!(broken = hlsl_new_synthetic_var(ctx, "broken",
 | |
|             hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc)))
 | |
|         goto fail;
 | |
| 
 | |
|     if (!(continued = hlsl_new_synthetic_var(ctx, "continued",
 | |
|             hlsl_get_scalar_type(ctx, HLSL_TYPE_BOOL), &loop->node.loc)))
 | |
|         goto fail;
 | |
| 
 | |
|     hlsl_block_init(&draft);
 | |
|     hlsl_block_init(&tmp_dst);
 | |
| 
 | |
|     max_iterations = loop_unrolling_get_max_iterations(ctx, loop);
 | |
|     copy_propagation_state_init(&state, ctx);
 | |
|     index = 2;
 | |
|     state.stop = &loop->node;
 | |
|     loop_unrolling_simplify(ctx, block, &state, &index);
 | |
|     state.stopped = false;
 | |
|     index = loop->node.index;
 | |
| 
 | |
|     if (!loop_unrolling_generate_const_bool_store(ctx, broken, false, &tmp_dst, &loop->node.loc))
 | |
|         goto fail;
 | |
|     hlsl_block_add_block(&draft, &tmp_dst);
 | |
| 
 | |
|     if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc))
 | |
|         goto fail;
 | |
|     hlsl_block_add_block(&draft, &tmp_dst);
 | |
| 
 | |
|     if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc)))
 | |
|         goto fail;
 | |
|     state.stop = LIST_ENTRY(list_head(&tmp_dst.instrs), struct hlsl_ir_node, entry);
 | |
|     hlsl_block_add_block(&draft, &tmp_dst);
 | |
| 
 | |
|     copy_propagation_push_scope(&state, ctx);
 | |
|     loop_unrolling_simplify(ctx, &draft, &state, &index);
 | |
| 
 | |
|     /* As an optimization, we only remove jumps from the loop's body once. */
 | |
|     if (!hlsl_clone_block(ctx, &loop_body, &loop->body))
 | |
|         goto fail;
 | |
|     loop_unrolling_remove_jumps(ctx, &loop_body, broken, continued);
 | |
| 
 | |
|     for (i = 0; i < max_iterations; ++i)
 | |
|     {
 | |
|         copy_propagation_push_scope(&state, ctx);
 | |
| 
 | |
|         if (!loop_unrolling_generate_const_bool_store(ctx, continued, false, &tmp_dst, &loop->node.loc))
 | |
|             goto fail;
 | |
|         hlsl_block_add_block(&target_if->then_block, &tmp_dst);
 | |
| 
 | |
|         if (!hlsl_clone_block(ctx, &tmp_dst, &loop_body))
 | |
|             goto fail;
 | |
|         hlsl_block_add_block(&target_if->then_block, &tmp_dst);
 | |
| 
 | |
|         loop_unrolling_simplify(ctx, &target_if->then_block, &state, &index);
 | |
| 
 | |
|         if (loop_unrolling_check_val(&state, broken))
 | |
|             break;
 | |
| 
 | |
|         if (!(target_if = loop_unrolling_generate_var_check(ctx, &tmp_dst, broken, &loop->node.loc)))
 | |
|             goto fail;
 | |
|         hlsl_block_add_block(&draft, &tmp_dst);
 | |
| 
 | |
|         if (!hlsl_clone_block(ctx, &tmp_dst, &loop->iter))
 | |
|             goto fail;
 | |
|         hlsl_block_add_block(&target_if->then_block, &tmp_dst);
 | |
|    }
 | |
| 
 | |
|     /* Native will not emit an error if max_iterations has been reached with an
 | |
|      * explicit limit. It also will not insert a loop if there are iterations left
 | |
|      * i.e [unroll(4)] for (i = 0; i < 8; ++i)) */
 | |
|     if (!loop->unroll_limit && i == max_iterations)
 | |
|     {
 | |
|         if (loop->unroll_type == HLSL_LOOP_FORCE_UNROLL)
 | |
|             hlsl_error(ctx, &loop->node.loc, VKD3D_SHADER_ERROR_HLSL_FAILED_FORCED_UNROLL,
 | |
|                 "Unable to unroll loop, maximum iterations reached (%u).", max_iterations);
 | |
|         goto fail;
 | |
|     }
 | |
| 
 | |
|     hlsl_block_cleanup(&loop_body);
 | |
|     copy_propagation_state_destroy(&state);
 | |
| 
 | |
|     list_move_before(&loop->node.entry, &draft.instrs);
 | |
|     hlsl_block_cleanup(&draft);
 | |
|     list_remove(&loop->node.entry);
 | |
|     hlsl_free_instr(&loop->node);
 | |
| 
 | |
|     return true;
 | |
| 
 | |
| fail:
 | |
|     hlsl_block_cleanup(&loop_body);
 | |
|     copy_propagation_state_destroy(&state);
 | |
|     hlsl_block_cleanup(&draft);
 | |
| 
 | |
|     return false;
 | |
| }
 | |
| 
 | |
| static bool unroll_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context)
 | |
| {
 | |
|     struct hlsl_block *program = context;
 | |
|     struct hlsl_ir_loop *loop;
 | |
| 
 | |
|     if (node->type != HLSL_IR_LOOP)
 | |
|         return true;
 | |
| 
 | |
|     loop = hlsl_ir_loop(node);
 | |
| 
 | |
|     if (loop->unroll_type != HLSL_LOOP_UNROLL && loop->unroll_type != HLSL_LOOP_FORCE_UNROLL)
 | |
|         return true;
 | |
| 
 | |
|     if (!loop_unrolling_unroll_loop(ctx, program, loop))
 | |
|         loop->unroll_type = HLSL_LOOP_FORCE_LOOP;
 | |
| 
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| /* We could handle this at parse time. However, loop unrolling often needs to
 | |
|  * know the value of variables modified in the "iter" block. It is possible to
 | |
|  * detect that all exit paths of a loop body modify such variables in the same
 | |
|  * way, but difficult, and d3dcompiler does not attempt to do so.
 | |
|  * In fact, d3dcompiler is capable of unrolling the following loop:
 | |
|  * for (int i = 0; i < 10; ++i)
 | |
|  * {
 | |
|  *     if (some_uniform > 4)
 | |
|  *         continue;
 | |
|  * }
 | |
|  * but cannot unroll the same loop with "++i" moved to each exit path:
 | |
|  * for (int i = 0; i < 10;)
 | |
|  * {
 | |
|  *     if (some_uniform > 4)
 | |
|  *     {
 | |
|  *         ++i;
 | |
|  *         continue;
 | |
|  *     }
 | |
|  *     ++i;
 | |
|  * }
 | |
|  */
 | |
| static bool resolve_loops(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, void *context)
 | |
| {
 | |
|     struct hlsl_ir_loop *loop;
 | |
| 
 | |
|     if (node->type != HLSL_IR_LOOP)
 | |
|         return true;
 | |
| 
 | |
|     loop = hlsl_ir_loop(node);
 | |
| 
 | |
|     hlsl_block_add_block(&loop->body, &loop->iter);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static void resolve_continues(struct hlsl_ctx *ctx, struct hlsl_block *block, struct hlsl_ir_loop *last_loop)
 | |
| {
 | |
|     struct hlsl_ir_node *node;
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(node, &block->instrs, struct hlsl_ir_node, entry)
 | |
|     {
 | |
|         switch (node->type)
 | |
|         {
 | |
|             case HLSL_IR_LOOP:
 | |
|             {
 | |
|                 struct hlsl_ir_loop *loop = hlsl_ir_loop(node);
 | |
| 
 | |
|                 resolve_continues(ctx, &loop->body, loop);
 | |
|                 break;
 | |
|             }
 | |
|             case HLSL_IR_IF:
 | |
|             {
 | |
|                 struct hlsl_ir_if *iff = hlsl_ir_if(node);
 | |
|                 resolve_continues(ctx, &iff->then_block, last_loop);
 | |
|                 resolve_continues(ctx, &iff->else_block, last_loop);
 | |
|                 break;
 | |
|             }
 | |
|             case HLSL_IR_SWITCH:
 | |
|             {
 | |
|                 struct hlsl_ir_switch *s = hlsl_ir_switch(node);
 | |
|                 struct hlsl_ir_switch_case *c;
 | |
| 
 | |
|                 LIST_FOR_EACH_ENTRY(c, &s->cases, struct hlsl_ir_switch_case, entry)
 | |
|                 {
 | |
|                     resolve_continues(ctx, &c->body, last_loop);
 | |
|                 }
 | |
| 
 | |
|                 break;
 | |
|             }
 | |
|             case HLSL_IR_JUMP:
 | |
|             {
 | |
|                 struct hlsl_ir_jump *jump = hlsl_ir_jump(node);
 | |
| 
 | |
|                 if (jump->type != HLSL_IR_JUMP_UNRESOLVED_CONTINUE)
 | |
|                     break;
 | |
| 
 | |
|                 if (last_loop->type == HLSL_LOOP_FOR)
 | |
|                 {
 | |
|                     struct hlsl_block draft;
 | |
| 
 | |
|                     if (!hlsl_clone_block(ctx, &draft, &last_loop->iter))
 | |
|                         return;
 | |
| 
 | |
|                     list_move_before(&node->entry, &draft.instrs);
 | |
|                     hlsl_block_cleanup(&draft);
 | |
|                 }
 | |
| 
 | |
|                 jump->type = HLSL_IR_JUMP_CONTINUE;
 | |
|                 break;
 | |
|             }
 | |
|             default:
 | |
|                 break;
 | |
|         }
 | |
|     }
 | |
| }
 | |
| 
 | |
| static void loop_unrolling_execute(struct hlsl_ctx *ctx, struct hlsl_block *block)
 | |
| {
 | |
|     bool progress;
 | |
| 
 | |
|     /* These are required by copy propagation, which in turn is required for
 | |
|      * unrolling. */
 | |
|     do
 | |
|     {
 | |
|         progress = hlsl_transform_ir(ctx, split_array_copies, block, NULL);
 | |
|         progress |= hlsl_transform_ir(ctx, split_struct_copies, block, NULL);
 | |
|     } while (progress);
 | |
|     hlsl_transform_ir(ctx, split_matrix_copies, block, NULL);
 | |
| 
 | |
|     hlsl_transform_ir(ctx, unroll_loops, block, block);
 | |
|     resolve_continues(ctx, block, NULL);
 | |
|     hlsl_transform_ir(ctx, resolve_loops, block, NULL);
 | |
| }
 | |
| 
 | |
| static bool lower_f16tof32(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_function_decl *func;
 | |
|     struct hlsl_ir_node *call, *rhs;
 | |
|     unsigned int component_count;
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     struct hlsl_ir_var *lhs;
 | |
|     char *body;
 | |
| 
 | |
|     static const char template[] =
 | |
|     "typedef uint%u uintX;\n"
 | |
|     "float%u soft_f16tof32(uintX x)\n"
 | |
|     "{\n"
 | |
|     "    uintX mantissa = x & 0x3ff;\n"
 | |
|     "    uintX high2 = mantissa >> 8;\n"
 | |
|     "    uintX high2_check = high2 ? high2 : mantissa;\n"
 | |
|     "    uintX high6 = high2_check >> 4;\n"
 | |
|     "    uintX high6_check = high6 ? high6 : high2_check;\n"
 | |
|     "\n"
 | |
|     "    uintX high8 = high6_check >> 2;\n"
 | |
|     "    uintX high8_check = (high8 ? high8 : high6_check) >> 1;\n"
 | |
|     "    uintX shift = high6 ? (high2 ? 12 : 4) : (high2 ? 8 : 0);\n"
 | |
|     "    shift = high8 ? shift + 2 : shift;\n"
 | |
|     "    shift = high8_check ? shift + 1 : shift;\n"
 | |
|     "    shift = -shift + 10;\n"
 | |
|     "    shift = mantissa ? shift : 11;\n"
 | |
|     "    uintX subnormal_mantissa = ((mantissa << shift) << 23) & 0x7fe000;\n"
 | |
|     "    uintX subnormal_exp = -(shift << 23) + 0x38800000;\n"
 | |
|     "    uintX subnormal_val = subnormal_exp + subnormal_mantissa;\n"
 | |
|     "    uintX subnormal_or_zero = mantissa ? subnormal_val : 0;\n"
 | |
|     "\n"
 | |
|     "    uintX exponent = (((x >> 10) << 23) & 0xf800000) + 0x38000000;\n"
 | |
|     "\n"
 | |
|     "    uintX low_3 = (x << 13) & 0x7fe000;\n"
 | |
|     "    uintX normalized_val = exponent + low_3;\n"
 | |
|     "    uintX inf_nan_val = low_3 + 0x7f800000;\n"
 | |
|     "\n"
 | |
|     "    uintX exp_mask = 0x7c00;\n"
 | |
|     "    uintX is_inf_nan = (x & exp_mask) == exp_mask;\n"
 | |
|     "    uintX is_normalized = x & exp_mask;\n"
 | |
|     "\n"
 | |
|     "    uintX check = is_inf_nan ? inf_nan_val : normalized_val;\n"
 | |
|     "    uintX exp_mantissa = (is_normalized ? check : subnormal_or_zero) & 0x7fffe000;\n"
 | |
|     "    uintX sign_bit = (x << 16) & 0x80000000;\n"
 | |
|     "\n"
 | |
|     "    return asfloat(exp_mantissa + sign_bit);\n"
 | |
|     "}\n";
 | |
| 
 | |
| 
 | |
|     if (node->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
| 
 | |
|     expr = hlsl_ir_expr(node);
 | |
| 
 | |
|     if (expr->op != HLSL_OP1_F16TOF32)
 | |
|         return false;
 | |
| 
 | |
|     rhs = expr->operands[0].node;
 | |
|     component_count = hlsl_type_component_count(rhs->data_type);
 | |
| 
 | |
|     if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count)))
 | |
|         return false;
 | |
| 
 | |
|     if (!(func = hlsl_compile_internal_function(ctx, "soft_f16tof32", body)))
 | |
|         return false;
 | |
| 
 | |
|     lhs = func->parameters.vars[0];
 | |
|     hlsl_block_add_simple_store(ctx, block, lhs, rhs);
 | |
| 
 | |
|     if (!(call = hlsl_new_call(ctx, func, &node->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, call);
 | |
| 
 | |
|     hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool lower_f32tof16(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_function_decl *func;
 | |
|     struct hlsl_ir_node *call, *rhs;
 | |
|     unsigned int component_count;
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     struct hlsl_ir_var *lhs;
 | |
|     char *body;
 | |
| 
 | |
|     static const char template[] =
 | |
|     "typedef uint%u uintX;\n"
 | |
|     "uintX soft_f32tof16(float%u x)\n"
 | |
|     "{\n"
 | |
|     "    uintX v = asuint(x);\n"
 | |
|     "    uintX v_abs = v & 0x7fffffff;\n"
 | |
|     "    uintX sign_bit = (v >> 16) & 0x8000;\n"
 | |
|     "    uintX exp = (v >> 23) & 0xff;\n"
 | |
|     "    uintX mantissa = v & 0x7fffff;\n"
 | |
|     "    uintX nan16;\n"
 | |
|     "    uintX nan = (v & 0x7f800000) == 0x7f800000;\n"
 | |
|     "    uintX val;\n"
 | |
|     "\n"
 | |
|     "    val = 113 - exp;\n"
 | |
|     "    val = (mantissa + 0x800000) >> val;\n"
 | |
|     "    val >>= 13;\n"
 | |
|     "\n"
 | |
|     "    val = (exp - 127) < -38 ? 0 : val;\n"
 | |
|     "\n"
 | |
|     "    val = v_abs < 0x38800000 ? val : (v_abs + 0xc8000000) >> 13;\n"
 | |
|     "    val = v_abs > 0x47ffe000 ? 0x7bff : val;\n"
 | |
|     "\n"
 | |
|     "    nan16 = (((v >> 13) | (v >> 3) | v) & 0x3ff) + 0x7c00;\n"
 | |
|     "    val = nan ? nan16 : val;\n"
 | |
|     "\n"
 | |
|     "    return (val & 0x7fff) + sign_bit;\n"
 | |
|     "}\n";
 | |
| 
 | |
|     if (node->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
| 
 | |
|     expr = hlsl_ir_expr(node);
 | |
| 
 | |
|     if (expr->op != HLSL_OP1_F32TOF16)
 | |
|         return false;
 | |
| 
 | |
|     rhs = expr->operands[0].node;
 | |
|     component_count = hlsl_type_component_count(rhs->data_type);
 | |
| 
 | |
|     if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count)))
 | |
|         return false;
 | |
| 
 | |
|     if (!(func = hlsl_compile_internal_function(ctx, "soft_f32tof16", body)))
 | |
|         return false;
 | |
| 
 | |
|     lhs = func->parameters.vars[0];
 | |
|     hlsl_block_add_simple_store(ctx, block, lhs, rhs);
 | |
| 
 | |
|     if (!(call = hlsl_new_call(ctx, func, &node->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, call);
 | |
| 
 | |
|     hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static bool lower_isinf(struct hlsl_ctx *ctx, struct hlsl_ir_node *node, struct hlsl_block *block)
 | |
| {
 | |
|     struct hlsl_ir_function_decl *func;
 | |
|     struct hlsl_ir_node *call, *rhs;
 | |
|     unsigned int component_count;
 | |
|     struct hlsl_ir_expr *expr;
 | |
|     const char *template;
 | |
|     char *body;
 | |
| 
 | |
|     static const char template_sm2[] =
 | |
|         "typedef bool%u boolX;\n"
 | |
|         "typedef float%u floatX;\n"
 | |
|         "boolX isinf(floatX x)\n"
 | |
|         "{\n"
 | |
|         "    floatX v = 1 / x;\n"
 | |
|         "    v = v * v;\n"
 | |
|         "    return v <= 0;\n"
 | |
|         "}\n";
 | |
| 
 | |
|     static const char template_sm3[] =
 | |
|         "typedef bool%u boolX;\n"
 | |
|         "typedef float%u floatX;\n"
 | |
|         "boolX isinf(floatX x)\n"
 | |
|         "{\n"
 | |
|         "    floatX v = 1 / x;\n"
 | |
|         "    return v <= 0;\n"
 | |
|         "}\n";
 | |
| 
 | |
|     static const char template_sm4[] =
 | |
|         "typedef bool%u boolX;\n"
 | |
|         "typedef float%u floatX;\n"
 | |
|         "boolX isinf(floatX x)\n"
 | |
|         "{\n"
 | |
|         "    return (asuint(x) & 0x7fffffff) == 0x7f800000;\n"
 | |
|         "}\n";
 | |
| 
 | |
|     static const char template_int[] =
 | |
|         "typedef bool%u boolX;\n"
 | |
|         "typedef float%u floatX;\n"
 | |
|         "boolX isinf(floatX x)\n"
 | |
|         "{\n"
 | |
|         "    return false;\n"
 | |
|         "}";
 | |
| 
 | |
|     if (node->type != HLSL_IR_EXPR)
 | |
|         return false;
 | |
| 
 | |
|     expr = hlsl_ir_expr(node);
 | |
| 
 | |
|     if (expr->op != HLSL_OP1_ISINF)
 | |
|         return false;
 | |
| 
 | |
|     rhs = expr->operands[0].node;
 | |
| 
 | |
|     if (hlsl_version_lt(ctx, 3, 0))
 | |
|         template = template_sm2;
 | |
|     else if (hlsl_version_lt(ctx, 4, 0))
 | |
|         template = template_sm3;
 | |
|     else if (hlsl_type_is_integer(rhs->data_type))
 | |
|         template = template_int;
 | |
|     else
 | |
|         template = template_sm4;
 | |
| 
 | |
|     component_count = hlsl_type_component_count(rhs->data_type);
 | |
|     if (!(body = hlsl_sprintf_alloc(ctx, template, component_count, component_count)))
 | |
|         return false;
 | |
| 
 | |
|     if (!(func = hlsl_compile_internal_function(ctx, "isinf", body)))
 | |
|         return false;
 | |
| 
 | |
|     hlsl_block_add_simple_store(ctx, block, func->parameters.vars[0], rhs);
 | |
| 
 | |
|     if (!(call = hlsl_new_call(ctx, func, &node->loc)))
 | |
|         return false;
 | |
|     hlsl_block_add_instr(block, call);
 | |
| 
 | |
|     hlsl_block_add_simple_load(ctx, block, func->return_var, &node->loc);
 | |
|     return true;
 | |
| }
 | |
| 
 | |
| static void process_entry_function(struct hlsl_ctx *ctx,
 | |
|         const struct hlsl_block *global_uniform_block, struct hlsl_ir_function_decl *entry_func)
 | |
| {
 | |
|     const struct hlsl_ir_var *input_patch = NULL, *output_patch = NULL;
 | |
|     const struct hlsl_profile_info *profile = ctx->profile;
 | |
|     struct hlsl_block static_initializers, global_uniforms;
 | |
|     struct hlsl_block *const body = &entry_func->body;
 | |
|     struct recursive_call_ctx recursive_call_ctx;
 | |
|     struct stream_append_ctx stream_append_ctx;
 | |
|     uint32_t output_reg_count;
 | |
|     struct hlsl_ir_var *var;
 | |
|     unsigned int i;
 | |
|     bool progress;
 | |
| 
 | |
|     ctx->is_patch_constant_func = entry_func == ctx->patch_constant_func;
 | |
| 
 | |
|     if (!hlsl_clone_block(ctx, &static_initializers, &ctx->static_initializers))
 | |
|         return;
 | |
|     list_move_head(&body->instrs, &static_initializers.instrs);
 | |
| 
 | |
|     if (!hlsl_clone_block(ctx, &global_uniforms, global_uniform_block))
 | |
|         return;
 | |
|     list_move_head(&body->instrs, &global_uniforms.instrs);
 | |
| 
 | |
|     memset(&recursive_call_ctx, 0, sizeof(recursive_call_ctx));
 | |
|     hlsl_transform_ir(ctx, find_recursive_calls, body, &recursive_call_ctx);
 | |
|     vkd3d_free(recursive_call_ctx.backtrace);
 | |
| 
 | |
|     /* Avoid going into an infinite loop when processing call instructions.
 | |
|      * lower_return() recurses into inferior calls. */
 | |
|     if (ctx->result)
 | |
|         return;
 | |
| 
 | |
|     if (hlsl_version_ge(ctx, 4, 0) && hlsl_version_lt(ctx, 5, 0))
 | |
|     {
 | |
|         lower_ir(ctx, lower_f16tof32, body);
 | |
|         lower_ir(ctx, lower_f32tof16, body);
 | |
|     }
 | |
| 
 | |
|     lower_ir(ctx, lower_isinf, body);
 | |
| 
 | |
|     lower_return(ctx, entry_func, body, false);
 | |
| 
 | |
|     while (hlsl_transform_ir(ctx, lower_calls, body, NULL));
 | |
| 
 | |
|     lower_ir(ctx, lower_complex_casts, body);
 | |
|     lower_ir(ctx, lower_matrix_swizzles, body);
 | |
|     lower_ir(ctx, lower_index_loads, body);
 | |
| 
 | |
|     for (i = 0; i < entry_func->parameters.count; ++i)
 | |
|     {
 | |
|         var = entry_func->parameters.vars[i];
 | |
| 
 | |
|         if (hlsl_type_is_resource(var->data_type))
 | |
|         {
 | |
|             prepend_uniform_copy(ctx, body, var);
 | |
|         }
 | |
|         else if ((var->storage_modifiers & HLSL_STORAGE_UNIFORM))
 | |
|         {
 | |
|             if (ctx->profile->type == VKD3D_SHADER_TYPE_HULL && ctx->is_patch_constant_func)
 | |
|                 hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER,
 | |
|                         "Patch constant function parameter \"%s\" cannot be uniform.", var->name);
 | |
|             else
 | |
|                 prepend_uniform_copy(ctx, body, var);
 | |
|         }
 | |
|         else if (hlsl_type_is_primitive_array(var->data_type))
 | |
|         {
 | |
|             if (var->storage_modifiers & HLSL_STORAGE_OUT)
 | |
|                 hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER,
 | |
|                         "Input primitive parameter \"%s\" is declared as \"out\".", var->name);
 | |
| 
 | |
|             if (profile->type != VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|             {
 | |
|                 enum hlsl_array_type array_type = var->data_type->e.array.array_type;
 | |
| 
 | |
|                 if (array_type == HLSL_ARRAY_PATCH_INPUT)
 | |
|                 {
 | |
|                     if (input_patch)
 | |
|                     {
 | |
|                         hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH,
 | |
|                                 "Found multiple InputPatch parameters.");
 | |
|                         hlsl_note(ctx, &input_patch->loc, VKD3D_SHADER_LOG_ERROR,
 | |
|                                 "The InputPatch parameter was previously declared here.");
 | |
|                         continue;
 | |
|                     }
 | |
|                     input_patch = var;
 | |
|                 }
 | |
|                 else if (array_type == HLSL_ARRAY_PATCH_OUTPUT)
 | |
|                 {
 | |
|                     if (output_patch)
 | |
|                     {
 | |
|                         hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_DUPLICATE_PATCH,
 | |
|                                 "Found multiple OutputPatch parameters.");
 | |
|                         hlsl_note(ctx, &output_patch->loc, VKD3D_SHADER_LOG_ERROR,
 | |
|                                 "The OutputPatch parameter was previously declared here.");
 | |
|                         continue;
 | |
|                     }
 | |
|                     output_patch = var;
 | |
|                 }
 | |
|             }
 | |
| 
 | |
|             validate_and_record_prim_type(ctx, var);
 | |
|             prepend_input_var_copy(ctx, entry_func, var);
 | |
|         }
 | |
|         else if (var->data_type->reg_size[HLSL_REGSET_STREAM_OUTPUTS])
 | |
|         {
 | |
|             if (profile->type != VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|             {
 | |
|                 hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
 | |
|                         "Stream output parameters can only be used in geometry shaders.");
 | |
|                 continue;
 | |
|             }
 | |
| 
 | |
|             if (!(var->storage_modifiers & HLSL_STORAGE_IN) || !(var->storage_modifiers & HLSL_STORAGE_OUT))
 | |
|                 hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER,
 | |
|                         "Stream output parameter \"%s\" must be declared as \"inout\".", var->name);
 | |
| 
 | |
|             prepend_uniform_copy(ctx, body, var);
 | |
|         }
 | |
|         else
 | |
|         {
 | |
|             if (hlsl_get_multiarray_element_type(var->data_type)->class != HLSL_CLASS_STRUCT
 | |
|                     && !var->semantic.name)
 | |
|             {
 | |
|                 hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC,
 | |
|                         "Parameter \"%s\" is missing a semantic.", var->name);
 | |
|                 var->semantic.reported_missing = true;
 | |
|             }
 | |
| 
 | |
|             if (var->storage_modifiers & HLSL_STORAGE_IN)
 | |
|             {
 | |
|                 if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY && !var->semantic.name)
 | |
|                 {
 | |
|                     hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_PRIMITIVE_TYPE,
 | |
|                             "Input parameter \"%s\" is missing a primitive type.", var->name);
 | |
|                     continue;
 | |
|                 }
 | |
| 
 | |
|                 prepend_input_var_copy(ctx, entry_func, var);
 | |
|             }
 | |
|             if (var->storage_modifiers & HLSL_STORAGE_OUT)
 | |
|             {
 | |
|                 if (profile->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func)
 | |
|                     hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER,
 | |
|                             "Output parameters are not supported in hull shader control point functions.");
 | |
|                 else if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|                     hlsl_error(ctx, &var->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_MODIFIER,
 | |
|                             "Output parameters are not allowed in geometry shaders.");
 | |
|                 else
 | |
|                     append_output_var_copy(ctx, entry_func, var);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
|     if (entry_func->return_var)
 | |
|     {
 | |
|         if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|             hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INCOMPATIBLE_PROFILE,
 | |
|                     "Geometry shaders cannot return values.");
 | |
|         else if (entry_func->return_var->data_type->class != HLSL_CLASS_STRUCT
 | |
|                 && !entry_func->return_var->semantic.name)
 | |
|             hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_SEMANTIC,
 | |
|                     "Entry point \"%s\" is missing a return value semantic.", entry_func->func->name);
 | |
| 
 | |
|         append_output_var_copy(ctx, entry_func, entry_func->return_var);
 | |
| 
 | |
|         if (profile->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func)
 | |
|             ctx->output_control_point_type = entry_func->return_var->data_type;
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         if (profile->type == VKD3D_SHADER_TYPE_HULL && !ctx->is_patch_constant_func)
 | |
|         {
 | |
|             if (!ctx->input_control_point_type)
 | |
|             {
 | |
|                 hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_INPUT_PATCH,
 | |
|                         "Pass-through control point function \"%s\" is missing an InputPatch parameter.",
 | |
|                         entry_func->func->name);
 | |
|             }
 | |
|             else if (ctx->output_control_point_count
 | |
|                     && ctx->output_control_point_count != ctx->input_control_point_count)
 | |
|             {
 | |
|                 hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_INVALID_CONTROL_POINT_COUNT,
 | |
|                         "Output control point count %u does not match the input control point count %u.",
 | |
|                         ctx->output_control_point_count, ctx->input_control_point_count);
 | |
|             }
 | |
|             else
 | |
|             {
 | |
|                 ctx->output_control_point_type = ctx->input_control_point_type;
 | |
|                 ctx->output_control_point_count = ctx->input_control_point_count;
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY && ctx->input_primitive_type == VKD3D_PT_UNDEFINED)
 | |
|         hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_PRIMITIVE_TYPE,
 | |
|                 "Entry point \"%s\" is missing an input primitive parameter.", entry_func->func->name);
 | |
| 
 | |
|     if (hlsl_version_ge(ctx, 4, 0))
 | |
|     {
 | |
|         hlsl_transform_ir(ctx, lower_discard_neg, body, NULL);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         hlsl_transform_ir(ctx, lower_discard_nz, body, NULL);
 | |
|         hlsl_transform_ir(ctx, lower_resource_load_bias, body, NULL);
 | |
|     }
 | |
| 
 | |
|     compute_liveness(ctx, entry_func);
 | |
|     transform_derefs(ctx, divert_written_uniform_derefs_to_temp, &entry_func->body);
 | |
| 
 | |
|     loop_unrolling_execute(ctx, body);
 | |
|     hlsl_run_const_passes(ctx, body);
 | |
| 
 | |
|     remove_unreachable_code(ctx, body);
 | |
|     hlsl_transform_ir(ctx, normalize_switch_cases, body, NULL);
 | |
| 
 | |
|     lower_ir(ctx, lower_nonconstant_vector_derefs, body);
 | |
|     lower_ir(ctx, lower_casts_to_bool, body);
 | |
|     lower_ir(ctx, lower_int_dot, body);
 | |
| 
 | |
|     if (hlsl_version_lt(ctx, 4, 0))
 | |
|         hlsl_transform_ir(ctx, lower_separate_samples, body, NULL);
 | |
| 
 | |
|     hlsl_transform_ir(ctx, validate_dereferences, body, NULL);
 | |
| 
 | |
|     do
 | |
|     {
 | |
|         progress = vectorize_exprs(ctx, body);
 | |
|         compute_liveness(ctx, entry_func);
 | |
|         progress |= hlsl_transform_ir(ctx, dce, body, NULL);
 | |
|         progress |= hlsl_transform_ir(ctx, fold_swizzle_chains, body, NULL);
 | |
|         progress |= hlsl_transform_ir(ctx, remove_trivial_swizzles, body, NULL);
 | |
|         progress |= vectorize_stores(ctx, body);
 | |
|     } while (progress);
 | |
| 
 | |
|     hlsl_transform_ir(ctx, track_object_components_sampler_dim, body, NULL);
 | |
| 
 | |
|     if (hlsl_version_ge(ctx, 4, 0))
 | |
|         hlsl_transform_ir(ctx, lower_combined_samples, body, NULL);
 | |
| 
 | |
|     do
 | |
|         compute_liveness(ctx, entry_func);
 | |
|     while (hlsl_transform_ir(ctx, dce, body, NULL));
 | |
| 
 | |
|     hlsl_transform_ir(ctx, track_components_usage, body, NULL);
 | |
|     if (hlsl_version_lt(ctx, 4, 0))
 | |
|         sort_synthetic_combined_samplers_first(ctx);
 | |
|     else
 | |
|         sort_synthetic_separated_samplers_first(ctx);
 | |
| 
 | |
|     if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|     {
 | |
|         allocate_stream_outputs(ctx);
 | |
|         validate_and_record_stream_outputs(ctx);
 | |
| 
 | |
|         memset(&stream_append_ctx, 0, sizeof(stream_append_ctx));
 | |
|         stream_append_ctx.func = entry_func;
 | |
|         hlsl_transform_ir(ctx, lower_stream_appends, body, &stream_append_ctx);
 | |
|     }
 | |
| 
 | |
|     if (profile->major_version < 4)
 | |
|     {
 | |
|         while (lower_ir(ctx, lower_nonconstant_array_loads, body));
 | |
| 
 | |
|         lower_ir(ctx, lower_ternary, body);
 | |
|         lower_ir(ctx, lower_int_modulus_sm1, body);
 | |
|         lower_ir(ctx, lower_division, body);
 | |
|         /* Constants casted to float must be folded, and new casts to bool also need to be lowered. */
 | |
|         hlsl_transform_ir(ctx, hlsl_fold_constant_exprs, body, NULL);
 | |
|         lower_ir(ctx, lower_casts_to_bool, body);
 | |
| 
 | |
|         lower_ir(ctx, lower_casts_to_int, body);
 | |
|         lower_ir(ctx, lower_trunc, body);
 | |
|         lower_ir(ctx, lower_sqrt, body);
 | |
|         lower_ir(ctx, lower_dot, body);
 | |
|         lower_ir(ctx, lower_round, body);
 | |
|         lower_ir(ctx, lower_ceil, body);
 | |
|         lower_ir(ctx, lower_floor, body);
 | |
|         lower_ir(ctx, lower_trig, body);
 | |
|         lower_ir(ctx, lower_comparison_operators, body);
 | |
|         lower_ir(ctx, lower_logic_not, body);
 | |
|         if (ctx->profile->type == VKD3D_SHADER_TYPE_PIXEL)
 | |
|             lower_ir(ctx, lower_slt, body);
 | |
|         else
 | |
|             lower_ir(ctx, lower_cmp, body);
 | |
|     }
 | |
| 
 | |
|     if (profile->major_version < 2)
 | |
|     {
 | |
|         lower_ir(ctx, lower_abs, body);
 | |
|     }
 | |
| 
 | |
|     lower_ir(ctx, validate_nonconstant_vector_store_derefs, body);
 | |
| 
 | |
|     hlsl_run_folding_passes(ctx, body);
 | |
| 
 | |
|     do
 | |
|         compute_liveness(ctx, entry_func);
 | |
|     while (hlsl_transform_ir(ctx, dce, body, NULL));
 | |
| 
 | |
|     /* TODO: move forward, remove when no longer needed */
 | |
|     transform_derefs(ctx, replace_deref_path_with_offset, body);
 | |
|     simplify_exprs(ctx, body);
 | |
|     transform_derefs(ctx, clean_constant_deref_offset_srcs, body);
 | |
| 
 | |
|     do
 | |
|         compute_liveness(ctx, entry_func);
 | |
|     while (hlsl_transform_ir(ctx, dce, body, NULL));
 | |
| 
 | |
|     compute_liveness(ctx, entry_func);
 | |
|     mark_vars_usage(ctx);
 | |
| 
 | |
|     calculate_resource_register_counts(ctx);
 | |
| 
 | |
|     allocate_register_reservations(ctx, &ctx->extern_vars);
 | |
|     allocate_register_reservations(ctx, &entry_func->extern_vars);
 | |
|     allocate_semantic_registers(ctx, entry_func, &output_reg_count);
 | |
| 
 | |
|     if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY)
 | |
|         validate_max_output_size(ctx, entry_func, output_reg_count);
 | |
| }
 | |
| 
 | |
| int hlsl_emit_bytecode(struct hlsl_ctx *ctx, struct hlsl_ir_function_decl *entry_func,
 | |
|         enum vkd3d_shader_target_type target_type, struct vkd3d_shader_code *out)
 | |
| {
 | |
|     const struct hlsl_profile_info *profile = ctx->profile;
 | |
|     struct hlsl_block global_uniform_block;
 | |
|     struct hlsl_ir_var *var;
 | |
| 
 | |
|     parse_entry_function_attributes(ctx, entry_func);
 | |
|     if (ctx->result)
 | |
|         return ctx->result;
 | |
| 
 | |
|     if (profile->type == VKD3D_SHADER_TYPE_HULL)
 | |
|         validate_hull_shader_attributes(ctx, entry_func);
 | |
|     else if (profile->type == VKD3D_SHADER_TYPE_COMPUTE && !ctx->found_numthreads)
 | |
|         hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
 | |
|                 "Entry point \"%s\" is missing a [numthreads] attribute.", entry_func->func->name);
 | |
|     else if (profile->type == VKD3D_SHADER_TYPE_DOMAIN && ctx->domain == VKD3D_TESSELLATOR_DOMAIN_INVALID)
 | |
|         hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
 | |
|                 "Entry point \"%s\" is missing a [domain] attribute.", entry_func->func->name);
 | |
|     else if (profile->type == VKD3D_SHADER_TYPE_GEOMETRY && !ctx->max_vertex_count)
 | |
|         hlsl_error(ctx, &entry_func->loc, VKD3D_SHADER_ERROR_HLSL_MISSING_ATTRIBUTE,
 | |
|                 "Entry point \"%s\" is missing a [maxvertexcount] attribute.", entry_func->func->name);
 | |
| 
 | |
|     hlsl_block_init(&global_uniform_block);
 | |
| 
 | |
|     LIST_FOR_EACH_ENTRY(var, &ctx->globals->vars, struct hlsl_ir_var, scope_entry)
 | |
|     {
 | |
|         if (var->storage_modifiers & HLSL_STORAGE_UNIFORM)
 | |
|             prepend_uniform_copy(ctx, &global_uniform_block, var);
 | |
|     }
 | |
| 
 | |
|     process_entry_function(ctx, &global_uniform_block, entry_func);
 | |
|     if (ctx->result)
 | |
|         return ctx->result;
 | |
| 
 | |
|     if (profile->type == VKD3D_SHADER_TYPE_HULL)
 | |
|     {
 | |
|         process_entry_function(ctx, &global_uniform_block, ctx->patch_constant_func);
 | |
|         if (ctx->result)
 | |
|             return ctx->result;
 | |
|     }
 | |
| 
 | |
|     hlsl_block_cleanup(&global_uniform_block);
 | |
| 
 | |
|     if (profile->major_version < 4)
 | |
|     {
 | |
|         mark_indexable_vars(ctx, entry_func);
 | |
|         allocate_const_registers(ctx, entry_func);
 | |
|         sort_uniforms_by_bind_count(ctx, HLSL_REGSET_SAMPLERS);
 | |
|         allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS);
 | |
|     }
 | |
|     else
 | |
|     {
 | |
|         allocate_buffers(ctx);
 | |
|         allocate_objects(ctx, entry_func, HLSL_REGSET_TEXTURES);
 | |
|         allocate_objects(ctx, entry_func, HLSL_REGSET_UAVS);
 | |
|         allocate_objects(ctx, entry_func, HLSL_REGSET_SAMPLERS);
 | |
|     }
 | |
| 
 | |
|     if (TRACE_ON())
 | |
|         rb_for_each_entry(&ctx->functions, dump_function, ctx);
 | |
| 
 | |
|     if (ctx->result)
 | |
|         return ctx->result;
 | |
| 
 | |
|     switch (target_type)
 | |
|     {
 | |
|         case VKD3D_SHADER_TARGET_D3D_BYTECODE:
 | |
|         {
 | |
|             uint32_t config_flags = vkd3d_shader_init_config_flags();
 | |
|             struct vkd3d_shader_code ctab = {0};
 | |
|             struct vsir_program program;
 | |
|             int result;
 | |
| 
 | |
|             sm1_generate_ctab(ctx, &ctab);
 | |
|             if (ctx->result)
 | |
|                 return ctx->result;
 | |
| 
 | |
|             sm1_generate_vsir(ctx, entry_func, config_flags, &program);
 | |
|             if (ctx->result)
 | |
|             {
 | |
|                 vsir_program_cleanup(&program);
 | |
|                 vkd3d_shader_free_shader_code(&ctab);
 | |
|                 return ctx->result;
 | |
|             }
 | |
| 
 | |
|             result = d3dbc_compile(&program, config_flags, NULL, &ctab, out, ctx->message_context);
 | |
|             vsir_program_cleanup(&program);
 | |
|             vkd3d_shader_free_shader_code(&ctab);
 | |
|             return result;
 | |
|         }
 | |
| 
 | |
|         case VKD3D_SHADER_TARGET_DXBC_TPF:
 | |
|         {
 | |
|             uint32_t config_flags = vkd3d_shader_init_config_flags();
 | |
|             struct vkd3d_shader_code rdef = {0};
 | |
|             struct vsir_program program;
 | |
|             int result;
 | |
| 
 | |
|             sm4_generate_rdef(ctx, &rdef);
 | |
|             if (ctx->result)
 | |
|                 return ctx->result;
 | |
| 
 | |
|             sm4_generate_vsir(ctx, entry_func, config_flags, &program);
 | |
|             if (ctx->result)
 | |
|             {
 | |
|                 vsir_program_cleanup(&program);
 | |
|                 vkd3d_shader_free_shader_code(&rdef);
 | |
|                 return ctx->result;
 | |
|             }
 | |
| 
 | |
|             result = tpf_compile(&program, config_flags, &rdef, out, ctx->message_context);
 | |
|             vsir_program_cleanup(&program);
 | |
|             vkd3d_shader_free_shader_code(&rdef);
 | |
|             return result;
 | |
|         }
 | |
| 
 | |
|         default:
 | |
|             ERR("Unsupported shader target type %#x.\n", target_type);
 | |
|             return VKD3D_ERROR_INVALID_ARGUMENT;
 | |
|     }
 | |
| }
 |