mirror of
https://github.com/izzy2lost/dolphin.git
synced 2026-03-10 11:48:14 -07:00
Merge pull request #11208 from TellowKrinkle/CPUCull
Cull vertices on the CPU
This commit is contained in:
@@ -214,6 +214,7 @@ public enum BooleanSetting implements AbstractBooleanSetting
|
||||
"SaveTextureCacheToState", true),
|
||||
GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION(Settings.FILE_GFX, Settings.SECTION_GFX_SETTINGS,
|
||||
"PreferVSForLinePointExpansion", false),
|
||||
GFX_CPU_CULL(Settings.FILE_GFX, Settings.SECTION_GFX_SETTINGS, "CPUCull", false),
|
||||
GFX_MODS_ENABLE(Settings.FILE_GFX, Settings.SECTION_GFX_SETTINGS, "EnableMods", false),
|
||||
|
||||
GFX_ENHANCE_FORCE_FILTERING(Settings.FILE_GFX, Settings.SECTION_GFX_ENHANCEMENTS,
|
||||
|
||||
@@ -938,6 +938,8 @@ public final class SettingsFragmentPresenter
|
||||
sl.add(new SwitchSetting(mContext, BooleanSetting.GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION,
|
||||
R.string.prefer_vs_for_point_line_expansion,
|
||||
R.string.prefer_vs_for_point_line_expansion_description));
|
||||
sl.add(new SwitchSetting(mContext, BooleanSetting.GFX_CPU_CULL, R.string.cpu_cull,
|
||||
R.string.cpu_cull_description));
|
||||
sl.add(new SwitchSetting(mContext, BooleanSetting.GFX_HACK_EFB_DEFER_INVALIDATION,
|
||||
R.string.defer_efb_invalidation, R.string.defer_efb_invalidation_description));
|
||||
sl.add(new InvertedSwitchSetting(mContext, BooleanSetting.GFX_HACK_FAST_TEXTURE_SAMPLING,
|
||||
|
||||
@@ -361,6 +361,8 @@
|
||||
<string name="backend_multithreading_description">Enables graphics backend multithreading (Vulkan only). May affect performance. If unsure, leave this checked.</string>
|
||||
<string name="prefer_vs_for_point_line_expansion">Prefer VS for Point/Line Expansion</string>
|
||||
<string name="prefer_vs_for_point_line_expansion_description">On backends that support both using the geometry shader and the vertex shader for expanding points and lines, selects the vertex shader for the job. May affect performance.</string>
|
||||
<string name="cpu_cull">Cull Vertices on the CPU</string>
|
||||
<string name="cpu_cull_description">Cull vertices on the CPU to reduce the number of draw calls required. May affect performance. If unsure, leave this unchecked.</string>
|
||||
<string name="defer_efb_invalidation">Defer EFB Cache Invalidation</string>
|
||||
<string name="defer_efb_invalidation_description">Defers invalidation of the EFB access cache until a GPU synchronization command is executed. May improve performance in some games at the cost of stability. If unsure, leave this unchecked.</string>
|
||||
<string name="manual_texture_sampling">Manual Texture Sampling</string>
|
||||
|
||||
@@ -93,6 +93,7 @@ const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE{
|
||||
{System::GFX, "Settings", "SaveTextureCacheToState"}, true};
|
||||
const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION{
|
||||
{System::GFX, "Settings", "PreferVSForLinePointExpansion"}, false};
|
||||
const Info<bool> GFX_CPU_CULL{{System::GFX, "Settings", "CPUCull"}, false};
|
||||
|
||||
const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS{
|
||||
{System::GFX, "Settings", "ManuallyUploadBuffers"}, TriState::Auto};
|
||||
|
||||
@@ -82,6 +82,7 @@ extern const Info<int> GFX_SHADER_COMPILER_THREADS;
|
||||
extern const Info<int> GFX_SHADER_PRECOMPILER_THREADS;
|
||||
extern const Info<bool> GFX_SAVE_TEXTURE_CACHE_TO_STATE;
|
||||
extern const Info<bool> GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION;
|
||||
extern const Info<bool> GFX_CPU_CULL;
|
||||
|
||||
extern const Info<TriState> GFX_MTL_MANUALLY_UPLOAD_BUFFERS;
|
||||
extern const Info<bool> GFX_MTL_USE_PRESENT_DRAWABLE;
|
||||
|
||||
@@ -632,6 +632,8 @@
|
||||
<ClInclude Include="VideoCommon\CommandProcessor.h" />
|
||||
<ClInclude Include="VideoCommon\ConstantManager.h" />
|
||||
<ClInclude Include="VideoCommon\CPMemory.h" />
|
||||
<ClInclude Include="VideoCommon\CPUCull.h" />
|
||||
<ClInclude Include="VideoCommon\CPUCullImpl.h" />
|
||||
<ClInclude Include="VideoCommon\DataReader.h" />
|
||||
<ClInclude Include="VideoCommon\DriverDetails.h" />
|
||||
<ClInclude Include="VideoCommon\Fifo.h" />
|
||||
@@ -1226,6 +1228,7 @@
|
||||
<ClCompile Include="VideoCommon\BPStructs.cpp" />
|
||||
<ClCompile Include="VideoCommon\CommandProcessor.cpp" />
|
||||
<ClCompile Include="VideoCommon\CPMemory.cpp" />
|
||||
<ClCompile Include="VideoCommon\CPUCull.cpp" />
|
||||
<ClCompile Include="VideoCommon\DriverDetails.cpp" />
|
||||
<ClCompile Include="VideoCommon\Fifo.cpp" />
|
||||
<ClCompile Include="VideoCommon\FramebufferManager.cpp" />
|
||||
|
||||
@@ -159,16 +159,18 @@ void AdvancedWidget::CreateWidgets()
|
||||
m_prefer_vs_for_point_line_expansion = new GraphicsBool(
|
||||
// i18n: VS is short for vertex shaders.
|
||||
tr("Prefer VS for Point/Line Expansion"), Config::GFX_PREFER_VS_FOR_LINE_POINT_EXPANSION);
|
||||
m_cpu_cull = new GraphicsBool(tr("Cull Vertices on the CPU"), Config::GFX_CPU_CULL);
|
||||
|
||||
misc_layout->addWidget(m_enable_cropping, 0, 0);
|
||||
misc_layout->addWidget(m_enable_prog_scan, 0, 1);
|
||||
misc_layout->addWidget(m_backend_multithreading, 1, 0);
|
||||
misc_layout->addWidget(m_prefer_vs_for_point_line_expansion, 1, 1);
|
||||
misc_layout->addWidget(m_cpu_cull, 2, 0);
|
||||
#ifdef _WIN32
|
||||
m_borderless_fullscreen =
|
||||
new GraphicsBool(tr("Borderless Fullscreen"), Config::GFX_BORDERLESS_FULLSCREEN);
|
||||
|
||||
misc_layout->addWidget(m_borderless_fullscreen, 2, 0);
|
||||
misc_layout->addWidget(m_borderless_fullscreen, 2, 1);
|
||||
#endif
|
||||
|
||||
// Experimental.
|
||||
@@ -369,6 +371,10 @@ void AdvancedWidget::AddDescriptions()
|
||||
"for expanding points and lines, selects the vertex shader for the job. May "
|
||||
"affect performance."
|
||||
"<br><br>%1");
|
||||
static const char TR_CPU_CULL_DESCRIPTION[] =
|
||||
QT_TR_NOOP("Cull vertices on the CPU to reduce the number of draw calls required. "
|
||||
"May affect performance and draw statistics.<br><br>"
|
||||
"<dolphin_emphasis>If unsure, leave this unchecked.</dolphin_emphasis>");
|
||||
static const char TR_DEFER_EFB_ACCESS_INVALIDATION_DESCRIPTION[] = QT_TR_NOOP(
|
||||
"Defers invalidation of the EFB access cache until a GPU synchronization command "
|
||||
"is executed. If disabled, the cache will be invalidated with every draw call. "
|
||||
@@ -441,6 +447,7 @@ void AdvancedWidget::AddDescriptions()
|
||||
vsexpand_extra = tr(IF_UNSURE_UNCHECKED);
|
||||
m_prefer_vs_for_point_line_expansion->SetDescription(
|
||||
tr(TR_PREFER_VS_FOR_POINT_LINE_EXPANSION_DESCRIPTION).arg(vsexpand_extra));
|
||||
m_cpu_cull->SetDescription(tr(TR_CPU_CULL_DESCRIPTION));
|
||||
#ifdef _WIN32
|
||||
m_borderless_fullscreen->SetDescription(tr(TR_BORDERLESS_FULLSCREEN_DESCRIPTION));
|
||||
#endif
|
||||
|
||||
@@ -69,6 +69,7 @@ private:
|
||||
ToolTipCheckBox* m_enable_prog_scan;
|
||||
GraphicsBool* m_backend_multithreading;
|
||||
GraphicsBool* m_prefer_vs_for_point_line_expansion;
|
||||
GraphicsBool* m_cpu_cull;
|
||||
GraphicsBool* m_borderless_fullscreen;
|
||||
|
||||
// Experimental
|
||||
|
||||
@@ -23,6 +23,9 @@ add_library(videocommon
|
||||
ConstantManager.h
|
||||
CPMemory.cpp
|
||||
CPMemory.h
|
||||
CPUCull.cpp
|
||||
CPUCull.h
|
||||
CPUCullImpl.h
|
||||
DriverDetails.cpp
|
||||
DriverDetails.h
|
||||
Fifo.cpp
|
||||
|
||||
174
Source/Core/VideoCommon/CPUCull.cpp
Normal file
174
Source/Core/VideoCommon/CPUCull.cpp
Normal file
@@ -0,0 +1,174 @@
|
||||
// Copyright 2022 Dolphin Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#include "VideoCommon/CPUCull.h"
|
||||
|
||||
#include "Common/Assert.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Common/MathUtil.h"
|
||||
#include "Common/MemoryUtil.h"
|
||||
#include "Core/System.h"
|
||||
|
||||
#include "VideoCommon/CPMemory.h"
|
||||
#include "VideoCommon/VertexManagerBase.h"
|
||||
#include "VideoCommon/VertexShaderManager.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
#include "VideoCommon/XFMemory.h"
|
||||
|
||||
// We really want things like c.w * a.x - a.w * c.x to stay symmetric, so they cancel to zero on
|
||||
// degenerate triangles. Make sure the compiler doesn't optimize in fmas where not requested.
|
||||
#ifdef _MSC_VER
|
||||
#pragma fp_contract(off)
|
||||
#else
|
||||
// GCC doesn't support any in-file way to turn off fp contract yet
|
||||
// Not ideal, but worst case scenario its cpu cull is worse at detecting degenerate triangles
|
||||
// (Most likely to happen on arm, as we don't compile the cull code for x86 fma)
|
||||
#pragma GCC diagnostic push
|
||||
#pragma GCC diagnostic ignored "-Wunknown-pragmas"
|
||||
#pragma STDC FP_CONTRACT OFF
|
||||
#pragma GCC diagnostic pop
|
||||
#endif
|
||||
|
||||
#if defined(_M_X86) || defined(_M_X86_64)
|
||||
#define USE_SSE
|
||||
#elif defined(_M_ARM_64)
|
||||
#define USE_NEON
|
||||
#else
|
||||
#define NO_SIMD
|
||||
#endif
|
||||
|
||||
#if defined(USE_SSE)
|
||||
#include <immintrin.h>
|
||||
#elif defined(USE_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
|
||||
#include "VideoCommon/CPUCullImpl.h"
|
||||
#ifdef USE_SSE
|
||||
#define USE_SSE3
|
||||
#include "VideoCommon/CPUCullImpl.h"
|
||||
#define USE_SSE41
|
||||
#include "VideoCommon/CPUCullImpl.h"
|
||||
#define USE_AVX
|
||||
#include "VideoCommon/CPUCullImpl.h"
|
||||
#define USE_FMA
|
||||
#include "VideoCommon/CPUCullImpl.h"
|
||||
#endif
|
||||
|
||||
#if defined(USE_SSE)
|
||||
#if defined(__AVX__) && defined(__FMA__)
|
||||
static constexpr int MIN_SSE = 51;
|
||||
#elif defined(__AVX__)
|
||||
static constexpr int MIN_SSE = 50;
|
||||
#elif defined(__SSE4_1__)
|
||||
static constexpr int MIN_SSE = 41;
|
||||
#elif defined(__SSE3__)
|
||||
static constexpr int MIN_SSE = 30;
|
||||
#else
|
||||
static constexpr int MIN_SSE = 0;
|
||||
#endif
|
||||
#endif
|
||||
|
||||
template <bool PositionHas3Elems, bool PerVertexPosMtx>
|
||||
static CPUCull::TransformFunction GetTransformFunction()
|
||||
{
|
||||
#if defined(USE_SSE)
|
||||
if (MIN_SSE >= 51 || (cpu_info.bAVX && cpu_info.bFMA))
|
||||
return CPUCull_FMA::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
|
||||
else if (MIN_SSE >= 50 || cpu_info.bAVX)
|
||||
return CPUCull_AVX::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
|
||||
else if (PositionHas3Elems && PerVertexPosMtx && (MIN_SSE >= 41 || cpu_info.bSSE4_1))
|
||||
return CPUCull_SSE41::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
|
||||
else if (PositionHas3Elems && (MIN_SSE >= 30 || cpu_info.bSSE3))
|
||||
return CPUCull_SSE3::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
|
||||
else
|
||||
return CPUCull_SSE::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
|
||||
#elif defined(USE_NEON)
|
||||
return CPUCull_NEON::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
|
||||
#else
|
||||
return CPUCull_Scalar::TransformVertices<PositionHas3Elems, PerVertexPosMtx>;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <OpcodeDecoder::Primitive Primitive, CullMode Mode>
|
||||
static CPUCull::CullFunction GetCullFunction0()
|
||||
{
|
||||
#if defined(USE_SSE)
|
||||
// Note: AVX version only actually AVX on compilers that support __attribute__((target))
|
||||
// Sorry, MSVC + Sandy Bridge. (Ivy+ and AMD see very little benefit thanks to mov elimination)
|
||||
if (MIN_SSE >= 50 || cpu_info.bAVX)
|
||||
return CPUCull_AVX::AreAllVerticesCulled<Primitive, Mode>;
|
||||
else if (MIN_SSE >= 30 || cpu_info.bSSE3)
|
||||
return CPUCull_SSE3::AreAllVerticesCulled<Primitive, Mode>;
|
||||
else
|
||||
return CPUCull_SSE::AreAllVerticesCulled<Primitive, Mode>;
|
||||
#elif defined(USE_NEON)
|
||||
return CPUCull_NEON::AreAllVerticesCulled<Primitive, Mode>;
|
||||
#else
|
||||
return CPUCull_Scalar::AreAllVerticesCulled<Primitive, Mode>;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <OpcodeDecoder::Primitive Primitive>
|
||||
static Common::EnumMap<CPUCull::CullFunction, CullMode::All> GetCullFunction1()
|
||||
{
|
||||
return {
|
||||
GetCullFunction0<Primitive, CullMode::None>(),
|
||||
GetCullFunction0<Primitive, CullMode::Back>(),
|
||||
GetCullFunction0<Primitive, CullMode::Front>(),
|
||||
GetCullFunction0<Primitive, CullMode::All>(),
|
||||
};
|
||||
}
|
||||
|
||||
CPUCull::~CPUCull() = default;
|
||||
|
||||
void CPUCull::Init()
|
||||
{
|
||||
m_transform_table[false][false] = GetTransformFunction<false, false>();
|
||||
m_transform_table[false][true] = GetTransformFunction<false, true>();
|
||||
m_transform_table[true][false] = GetTransformFunction<true, false>();
|
||||
m_transform_table[true][true] = GetTransformFunction<true, true>();
|
||||
using Prim = OpcodeDecoder::Primitive;
|
||||
m_cull_table[Prim::GX_DRAW_QUADS] = GetCullFunction1<Prim::GX_DRAW_QUADS>();
|
||||
m_cull_table[Prim::GX_DRAW_QUADS_2] = GetCullFunction1<Prim::GX_DRAW_QUADS>();
|
||||
m_cull_table[Prim::GX_DRAW_TRIANGLES] = GetCullFunction1<Prim::GX_DRAW_TRIANGLES>();
|
||||
m_cull_table[Prim::GX_DRAW_TRIANGLE_STRIP] = GetCullFunction1<Prim::GX_DRAW_TRIANGLE_STRIP>();
|
||||
m_cull_table[Prim::GX_DRAW_TRIANGLE_FAN] = GetCullFunction1<Prim::GX_DRAW_TRIANGLE_FAN>();
|
||||
}
|
||||
|
||||
bool CPUCull::AreAllVerticesCulled(VertexLoaderBase* loader, OpcodeDecoder::Primitive primitive,
|
||||
const u8* src, u32 count)
|
||||
{
|
||||
ASSERT_MSG(VIDEO, primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES,
|
||||
"CPUCull should not be called on lines or points");
|
||||
const u32 stride = loader->m_native_vtx_decl.stride;
|
||||
const bool posHas3Elems = loader->m_native_vtx_decl.position.components >= 3;
|
||||
const bool perVertexPosMtx = loader->m_native_vtx_decl.posmtx.enable;
|
||||
if (m_transform_buffer_size < count) [[unlikely]]
|
||||
{
|
||||
u32 new_size = MathUtil::NextPowerOf2(count);
|
||||
m_transform_buffer_size = new_size;
|
||||
m_transform_buffer.reset(static_cast<TransformedVertex*>(
|
||||
Common::AllocateAlignedMemory(new_size * sizeof(TransformedVertex), 32)));
|
||||
}
|
||||
|
||||
// transform functions need the projection matrix to tranform to clip space
|
||||
Core::System::GetInstance().GetVertexShaderManager().SetProjectionMatrix();
|
||||
|
||||
static constexpr Common::EnumMap<CullMode, CullMode::All> cullmode_invert = {
|
||||
CullMode::None, CullMode::Front, CullMode::Back, CullMode::All};
|
||||
|
||||
CullMode cullmode = bpmem.genMode.cullmode;
|
||||
if (xfmem.viewport.ht > 0) // See videosoftware Clipper.cpp:IsBackface
|
||||
cullmode = cullmode_invert[cullmode];
|
||||
const TransformFunction transform = m_transform_table[posHas3Elems][perVertexPosMtx];
|
||||
transform(m_transform_buffer.get(), src, stride, count);
|
||||
const CullFunction cull = m_cull_table[primitive][cullmode];
|
||||
return cull(m_transform_buffer.get(), count);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void CPUCull::BufferDeleter<T>::operator()(T* ptr)
|
||||
{
|
||||
Common::FreeAlignedMemory(ptr);
|
||||
}
|
||||
38
Source/Core/VideoCommon/CPUCull.h
Normal file
38
Source/Core/VideoCommon/CPUCull.h
Normal file
@@ -0,0 +1,38 @@
|
||||
// Copyright 2022 Dolphin Emulator Project
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "VideoCommon/BPMemory.h"
|
||||
#include "VideoCommon/DataReader.h"
|
||||
#include "VideoCommon/OpcodeDecoding.h"
|
||||
|
||||
class CPUCull
|
||||
{
|
||||
public:
|
||||
~CPUCull();
|
||||
void Init();
|
||||
bool AreAllVerticesCulled(VertexLoaderBase* loader, OpcodeDecoder::Primitive primitive,
|
||||
const u8* src, u32 count);
|
||||
|
||||
struct alignas(16) TransformedVertex
|
||||
{
|
||||
float x, y, z, w;
|
||||
};
|
||||
|
||||
using TransformFunction = void (*)(void*, const void*, u32, int);
|
||||
using CullFunction = bool (*)(const CPUCull::TransformedVertex*, int);
|
||||
|
||||
private:
|
||||
template <typename T>
|
||||
struct BufferDeleter
|
||||
{
|
||||
void operator()(T* ptr);
|
||||
};
|
||||
std::unique_ptr<TransformedVertex[], BufferDeleter<TransformedVertex>> m_transform_buffer;
|
||||
u32 m_transform_buffer_size = 0;
|
||||
std::array<std::array<TransformFunction, 2>, 2> m_transform_table;
|
||||
Common::EnumMap<Common::EnumMap<CullFunction, CullMode::All>,
|
||||
OpcodeDecoder::Primitive::GX_DRAW_TRIANGLE_FAN>
|
||||
m_cull_table;
|
||||
};
|
||||
714
Source/Core/VideoCommon/CPUCullImpl.h
Normal file
714
Source/Core/VideoCommon/CPUCullImpl.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -17,7 +17,7 @@ enum class SrcBlendFactor : u32;
|
||||
enum class ZTexOp : u32;
|
||||
enum class LogicOp : u32;
|
||||
|
||||
struct PixelShaderConstants
|
||||
struct alignas(16) PixelShaderConstants
|
||||
{
|
||||
std::array<int4, 4> colors;
|
||||
std::array<int4, 4> kcolors;
|
||||
@@ -60,7 +60,7 @@ struct PixelShaderConstants
|
||||
LogicOp logic_op_mode;
|
||||
};
|
||||
|
||||
struct VertexShaderConstants
|
||||
struct alignas(16) VertexShaderConstants
|
||||
{
|
||||
u32 components; // .x
|
||||
u32 xfmem_dualTexInfo; // .y
|
||||
@@ -109,7 +109,7 @@ enum class VSExpand : u32
|
||||
Line,
|
||||
};
|
||||
|
||||
struct GeometryShaderConstants
|
||||
struct alignas(16) GeometryShaderConstants
|
||||
{
|
||||
float4 stereoparams;
|
||||
float4 lineptparams;
|
||||
|
||||
@@ -30,6 +30,7 @@
|
||||
#include "VideoCommon/VertexLoaderBase.h"
|
||||
#include "VideoCommon/VertexManagerBase.h"
|
||||
#include "VideoCommon/VertexShaderManager.h"
|
||||
#include "VideoCommon/VideoConfig.h"
|
||||
#include "VideoCommon/XFMemory.h"
|
||||
|
||||
namespace VertexLoaderManager
|
||||
@@ -366,17 +367,33 @@ int RunVertices(int vtx_attr_group, OpcodeDecoder::Primitive primitive, int coun
|
||||
vertex_shader_manager.SetVertexFormat(loader->m_native_components,
|
||||
loader->m_native_vertex_format->GetVertexDeclaration());
|
||||
|
||||
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
|
||||
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence
|
||||
// slope.
|
||||
bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
|
||||
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
|
||||
// CPUCull's performance increase comes from encoding fewer GPU commands, not sending less data
|
||||
// Therefore it's only useful to check if culling could remove a flush
|
||||
const bool can_cpu_cull = g_ActiveConfig.bCPUCull &&
|
||||
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES &&
|
||||
!g_vertex_manager->HasSendableVertices();
|
||||
|
||||
DataReader dst = g_vertex_manager->PrepareForAdditionalData(
|
||||
primitive, count, loader->m_native_vtx_decl.stride, cullall);
|
||||
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
|
||||
// They still need to go through vertex loading, because we need to calculate a zfreeze
|
||||
// reference slope.
|
||||
const bool cullall = (bpmem.genMode.cullmode == CullMode::All &&
|
||||
primitive < OpcodeDecoder::Primitive::GX_DRAW_LINES);
|
||||
|
||||
const int stride = loader->m_native_vtx_decl.stride;
|
||||
DataReader dst = g_vertex_manager->PrepareForAdditionalData(primitive, count, stride,
|
||||
cullall || can_cpu_cull);
|
||||
|
||||
count = loader->RunVertices(src, dst.GetPointer(), count);
|
||||
|
||||
if (can_cpu_cull && !cullall)
|
||||
{
|
||||
if (!g_vertex_manager->AreAllVerticesCulled(loader, primitive, dst.GetPointer(), count))
|
||||
{
|
||||
DataReader new_dst = g_vertex_manager->DisableCullAll(stride);
|
||||
memmove(new_dst.GetPointer(), dst.GetPointer(), count * stride);
|
||||
}
|
||||
}
|
||||
|
||||
g_vertex_manager->AddIndices(primitive, count);
|
||||
g_vertex_manager->FlushData(count, loader->m_native_vtx_decl.stride);
|
||||
|
||||
|
||||
@@ -104,6 +104,7 @@ VertexManagerBase::~VertexManagerBase() = default;
|
||||
bool VertexManagerBase::Initialize()
|
||||
{
|
||||
m_index_generator.Init();
|
||||
m_cpu_cull.Init();
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -117,6 +118,13 @@ void VertexManagerBase::AddIndices(OpcodeDecoder::Primitive primitive, u32 num_v
|
||||
m_index_generator.AddIndices(primitive, num_vertices);
|
||||
}
|
||||
|
||||
bool VertexManagerBase::AreAllVerticesCulled(VertexLoaderBase* loader,
|
||||
OpcodeDecoder::Primitive primitive, const u8* src,
|
||||
u32 count)
|
||||
{
|
||||
return m_cpu_cull.AreAllVerticesCulled(loader, primitive, src, count);
|
||||
}
|
||||
|
||||
DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive primitive,
|
||||
u32 count, u32 stride, bool cullall)
|
||||
{
|
||||
@@ -187,6 +195,16 @@ DataReader VertexManagerBase::PrepareForAdditionalData(OpcodeDecoder::Primitive
|
||||
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
|
||||
}
|
||||
|
||||
DataReader VertexManagerBase::DisableCullAll(u32 stride)
|
||||
{
|
||||
if (m_cull_all)
|
||||
{
|
||||
m_cull_all = false;
|
||||
ResetBuffer(stride);
|
||||
}
|
||||
return DataReader(m_cur_buffer_pointer, m_end_buffer_pointer);
|
||||
}
|
||||
|
||||
void VertexManagerBase::FlushData(u32 count, u32 stride)
|
||||
{
|
||||
m_cur_buffer_pointer += count * stride;
|
||||
@@ -548,6 +566,8 @@ void VertexManagerBase::Flush()
|
||||
// Now the vertices can be flushed to the GPU. Everything following the CommitBuffer() call
|
||||
// must be careful to not upload any utility vertices, as the binding will be lost otherwise.
|
||||
const u32 num_indices = m_index_generator.GetIndexLen();
|
||||
if (num_indices == 0)
|
||||
return;
|
||||
u32 base_vertex, base_index;
|
||||
CommitBuffer(m_index_generator.GetNumVerts(),
|
||||
VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride(), num_indices,
|
||||
|
||||
@@ -9,6 +9,7 @@
|
||||
#include "Common/BitSet.h"
|
||||
#include "Common/CommonTypes.h"
|
||||
#include "Common/MathUtil.h"
|
||||
#include "VideoCommon/CPUCull.h"
|
||||
#include "VideoCommon/IndexGenerator.h"
|
||||
#include "VideoCommon/RenderState.h"
|
||||
#include "VideoCommon/ShaderCache.h"
|
||||
@@ -100,11 +101,18 @@ public:
|
||||
|
||||
PrimitiveType GetCurrentPrimitiveType() const { return m_current_primitive_type; }
|
||||
void AddIndices(OpcodeDecoder::Primitive primitive, u32 num_vertices);
|
||||
bool AreAllVerticesCulled(VertexLoaderBase* loader, OpcodeDecoder::Primitive primitive,
|
||||
const u8* src, u32 count);
|
||||
virtual DataReader PrepareForAdditionalData(OpcodeDecoder::Primitive primitive, u32 count,
|
||||
u32 stride, bool cullall);
|
||||
/// Switch cullall off after a call to PrepareForAdditionalData with cullall true
|
||||
/// Expects that you will add a nonzero number of primitives before the next flush
|
||||
/// Returns whether cullall was changed (false if cullall was already off)
|
||||
DataReader DisableCullAll(u32 stride);
|
||||
void FlushData(u32 count, u32 stride);
|
||||
|
||||
void Flush();
|
||||
bool HasSendableVertices() const { return !m_is_flushed && !m_cull_all; }
|
||||
|
||||
void DoState(PointerWrap& p);
|
||||
|
||||
@@ -201,6 +209,7 @@ protected:
|
||||
bool m_cull_all = false;
|
||||
|
||||
IndexGenerator m_index_generator;
|
||||
CPUCull m_cpu_cull;
|
||||
|
||||
private:
|
||||
// Minimum number of draws per command buffer when attempting to preempt a readback operation.
|
||||
|
||||
@@ -65,6 +65,97 @@ void VertexShaderManager::Dirty()
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
Common::Matrix44 VertexShaderManager::LoadProjectionMatrix()
|
||||
{
|
||||
const auto& rawProjection = xfmem.projection.rawProjection;
|
||||
|
||||
switch (xfmem.projection.type)
|
||||
{
|
||||
case ProjectionType::Perspective:
|
||||
{
|
||||
const Common::Vec2 fov_multiplier = g_freelook_camera.IsActive() ?
|
||||
g_freelook_camera.GetFieldOfViewMultiplier() :
|
||||
Common::Vec2{1, 1};
|
||||
m_projection_matrix[0] = rawProjection[0] * g_ActiveConfig.fAspectRatioHackW * fov_multiplier.x;
|
||||
m_projection_matrix[1] = 0.0f;
|
||||
m_projection_matrix[2] = rawProjection[1] * g_ActiveConfig.fAspectRatioHackW * fov_multiplier.x;
|
||||
m_projection_matrix[3] = 0.0f;
|
||||
|
||||
m_projection_matrix[4] = 0.0f;
|
||||
m_projection_matrix[5] = rawProjection[2] * g_ActiveConfig.fAspectRatioHackH * fov_multiplier.y;
|
||||
m_projection_matrix[6] = rawProjection[3] * g_ActiveConfig.fAspectRatioHackH * fov_multiplier.y;
|
||||
m_projection_matrix[7] = 0.0f;
|
||||
|
||||
m_projection_matrix[8] = 0.0f;
|
||||
m_projection_matrix[9] = 0.0f;
|
||||
m_projection_matrix[10] = rawProjection[4];
|
||||
m_projection_matrix[11] = rawProjection[5];
|
||||
|
||||
m_projection_matrix[12] = 0.0f;
|
||||
m_projection_matrix[13] = 0.0f;
|
||||
|
||||
m_projection_matrix[14] = -1.0f;
|
||||
m_projection_matrix[15] = 0.0f;
|
||||
|
||||
g_stats.gproj = m_projection_matrix;
|
||||
}
|
||||
break;
|
||||
|
||||
case ProjectionType::Orthographic:
|
||||
{
|
||||
m_projection_matrix[0] = rawProjection[0];
|
||||
m_projection_matrix[1] = 0.0f;
|
||||
m_projection_matrix[2] = 0.0f;
|
||||
m_projection_matrix[3] = rawProjection[1];
|
||||
|
||||
m_projection_matrix[4] = 0.0f;
|
||||
m_projection_matrix[5] = rawProjection[2];
|
||||
m_projection_matrix[6] = 0.0f;
|
||||
m_projection_matrix[7] = rawProjection[3];
|
||||
|
||||
m_projection_matrix[8] = 0.0f;
|
||||
m_projection_matrix[9] = 0.0f;
|
||||
m_projection_matrix[10] = rawProjection[4];
|
||||
m_projection_matrix[11] = rawProjection[5];
|
||||
|
||||
m_projection_matrix[12] = 0.0f;
|
||||
m_projection_matrix[13] = 0.0f;
|
||||
|
||||
m_projection_matrix[14] = 0.0f;
|
||||
m_projection_matrix[15] = 1.0f;
|
||||
|
||||
g_stats.g2proj = m_projection_matrix;
|
||||
g_stats.proj = rawProjection;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
ERROR_LOG_FMT(VIDEO, "Unknown projection type: {}", xfmem.projection.type);
|
||||
}
|
||||
|
||||
PRIM_LOG("Projection: {} {} {} {} {} {}", rawProjection[0], rawProjection[1], rawProjection[2],
|
||||
rawProjection[3], rawProjection[4], rawProjection[5]);
|
||||
|
||||
auto corrected_matrix = m_viewport_correction * Common::Matrix44::FromArray(m_projection_matrix);
|
||||
|
||||
if (g_freelook_camera.IsActive() && xfmem.projection.type == ProjectionType::Perspective)
|
||||
corrected_matrix *= g_freelook_camera.GetView();
|
||||
|
||||
g_freelook_camera.GetController()->SetClean();
|
||||
|
||||
return corrected_matrix;
|
||||
}
|
||||
|
||||
void VertexShaderManager::SetProjectionMatrix()
|
||||
{
|
||||
if (m_projection_changed || g_freelook_camera.GetController()->IsDirty())
|
||||
{
|
||||
m_projection_changed = false;
|
||||
auto corrected_matrix = LoadProjectionMatrix();
|
||||
memcpy(constants.projection.data(), corrected_matrix.data.data(), 4 * sizeof(float4));
|
||||
}
|
||||
}
|
||||
|
||||
// Syncs the shader constant buffers with xfmem
|
||||
// TODO: A cleaner way to control the matrices without making a mess in the parameters field
|
||||
void VertexShaderManager::SetConstants(const std::vector<std::string>& textures)
|
||||
@@ -317,84 +408,7 @@ void VertexShaderManager::SetConstants(const std::vector<std::string>& textures)
|
||||
m_projection_changed = false;
|
||||
m_projection_graphics_mod_change = !projection_actions.empty();
|
||||
|
||||
const auto& rawProjection = xfmem.projection.rawProjection;
|
||||
|
||||
switch (xfmem.projection.type)
|
||||
{
|
||||
case ProjectionType::Perspective:
|
||||
{
|
||||
const Common::Vec2 fov_multiplier = g_freelook_camera.IsActive() ?
|
||||
g_freelook_camera.GetFieldOfViewMultiplier() :
|
||||
Common::Vec2{1, 1};
|
||||
m_projection_matrix[0] =
|
||||
rawProjection[0] * g_ActiveConfig.fAspectRatioHackW * fov_multiplier.x;
|
||||
m_projection_matrix[1] = 0.0f;
|
||||
m_projection_matrix[2] =
|
||||
rawProjection[1] * g_ActiveConfig.fAspectRatioHackW * fov_multiplier.x;
|
||||
m_projection_matrix[3] = 0.0f;
|
||||
|
||||
m_projection_matrix[4] = 0.0f;
|
||||
m_projection_matrix[5] =
|
||||
rawProjection[2] * g_ActiveConfig.fAspectRatioHackH * fov_multiplier.y;
|
||||
m_projection_matrix[6] =
|
||||
rawProjection[3] * g_ActiveConfig.fAspectRatioHackH * fov_multiplier.y;
|
||||
m_projection_matrix[7] = 0.0f;
|
||||
|
||||
m_projection_matrix[8] = 0.0f;
|
||||
m_projection_matrix[9] = 0.0f;
|
||||
m_projection_matrix[10] = rawProjection[4];
|
||||
m_projection_matrix[11] = rawProjection[5];
|
||||
|
||||
m_projection_matrix[12] = 0.0f;
|
||||
m_projection_matrix[13] = 0.0f;
|
||||
|
||||
m_projection_matrix[14] = -1.0f;
|
||||
m_projection_matrix[15] = 0.0f;
|
||||
|
||||
g_stats.gproj = m_projection_matrix;
|
||||
}
|
||||
break;
|
||||
|
||||
case ProjectionType::Orthographic:
|
||||
{
|
||||
m_projection_matrix[0] = rawProjection[0];
|
||||
m_projection_matrix[1] = 0.0f;
|
||||
m_projection_matrix[2] = 0.0f;
|
||||
m_projection_matrix[3] = rawProjection[1];
|
||||
|
||||
m_projection_matrix[4] = 0.0f;
|
||||
m_projection_matrix[5] = rawProjection[2];
|
||||
m_projection_matrix[6] = 0.0f;
|
||||
m_projection_matrix[7] = rawProjection[3];
|
||||
|
||||
m_projection_matrix[8] = 0.0f;
|
||||
m_projection_matrix[9] = 0.0f;
|
||||
m_projection_matrix[10] = rawProjection[4];
|
||||
m_projection_matrix[11] = rawProjection[5];
|
||||
|
||||
m_projection_matrix[12] = 0.0f;
|
||||
m_projection_matrix[13] = 0.0f;
|
||||
|
||||
m_projection_matrix[14] = 0.0f;
|
||||
m_projection_matrix[15] = 1.0f;
|
||||
|
||||
g_stats.g2proj = m_projection_matrix;
|
||||
g_stats.proj = rawProjection;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
ERROR_LOG_FMT(VIDEO, "Unknown projection type: {}", xfmem.projection.type);
|
||||
}
|
||||
|
||||
PRIM_LOG("Projection: {} {} {} {} {} {}", rawProjection[0], rawProjection[1], rawProjection[2],
|
||||
rawProjection[3], rawProjection[4], rawProjection[5]);
|
||||
|
||||
auto corrected_matrix =
|
||||
m_viewport_correction * Common::Matrix44::FromArray(m_projection_matrix);
|
||||
|
||||
if (g_freelook_camera.IsActive() && xfmem.projection.type == ProjectionType::Perspective)
|
||||
corrected_matrix *= g_freelook_camera.GetView();
|
||||
auto corrected_matrix = LoadProjectionMatrix();
|
||||
|
||||
GraphicsModActionData::Projection projection{&corrected_matrix};
|
||||
for (auto action : projection_actions)
|
||||
@@ -404,8 +418,6 @@ void VertexShaderManager::SetConstants(const std::vector<std::string>& textures)
|
||||
|
||||
memcpy(constants.projection.data(), corrected_matrix.data.data(), 4 * sizeof(float4));
|
||||
|
||||
g_freelook_camera.GetController()->SetClean();
|
||||
|
||||
dirty = true;
|
||||
}
|
||||
|
||||
|
||||
@@ -24,6 +24,7 @@ public:
|
||||
void DoState(PointerWrap& p);
|
||||
|
||||
// constant management
|
||||
void SetProjectionMatrix();
|
||||
void SetConstants(const std::vector<std::string>& textures);
|
||||
|
||||
void InvalidateXFRange(int start, int end);
|
||||
@@ -64,4 +65,6 @@ private:
|
||||
std::array<int, 2> m_minmax_lights_changed{};
|
||||
|
||||
Common::Matrix44 m_viewport_correction{};
|
||||
|
||||
Common::Matrix44 LoadProjectionMatrix();
|
||||
};
|
||||
|
||||
@@ -113,6 +113,7 @@ void VideoConfig::Refresh()
|
||||
iShaderCompilationMode = Config::Get(Config::GFX_SHADER_COMPILATION_MODE);
|
||||
iShaderCompilerThreads = Config::Get(Config::GFX_SHADER_COMPILER_THREADS);
|
||||
iShaderPrecompilerThreads = Config::Get(Config::GFX_SHADER_PRECOMPILER_THREADS);
|
||||
bCPUCull = Config::Get(Config::GFX_CPU_CULL);
|
||||
|
||||
texture_filtering_mode = Config::Get(Config::GFX_ENHANCE_FORCE_TEXTURE_FILTERING);
|
||||
iMaxAnisotropy = Config::Get(Config::GFX_ENHANCE_MAX_ANISOTROPY);
|
||||
|
||||
@@ -138,6 +138,7 @@ struct VideoConfig final
|
||||
bool bPerfQueriesEnable = false;
|
||||
bool bBBoxEnable = false;
|
||||
bool bForceProgressive = false;
|
||||
bool bCPUCull = false;
|
||||
|
||||
bool bEFBEmulateFormatChanges = false;
|
||||
bool bSkipEFBCopyToRam = false;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user